#! /bin/bash # Last edited on 2024-07-17 09:31:25 by stolfi PROG_NAME=${0##*/} PROG_DESC="find all files under given directories; print cksum, size, date, name" PROG_HELP=( "${PROG_NAME} [-exclude {NAME}].. {DIR}.." ) PROG_INFO=( "\nNAME" "\n ${PROG_NAME} - ${PROG_DESC}." "\n" "\nSYNOPSIS" "\n ${PROG_HELP[@]}" "\n" "\nDESCRIPTION" "\n Writes to stdout a list of all ordinary files" "\n in the specified directories (which usually should be disjoint)." "\n" "\n For each file, prints: the checksum (as a zero-padded" "\n 10-digit decimal integer), the size in bytes, the last-modified time" "\n (in the format YYYY-MM-DD-hhmmss, UTC timeone), and the file's pathname." "\n" "\n Rejects any directory or file names contain blanks, line" "\n breaks, backslashes, single or double quotes. Assumes" "\n that the full paths found do not contain any double slashes \"//\"." "\n" "\n Does not list symbolic links, pipes, etc. Excludes a few" "\n trash directories that are known to contain many" "\n invalid file names, such as {.cache}, {cache}, {.config/chromium}, etc." "\n" "\nOPTIONS" "\n -nsec." "\n If this option is present, the time field ends with a" "\n fraction of second consisting of a '.' and nine decimal" "\n digits (nanoseconds). Otherwise the time field has just" "\n whole seconds." "\n" "\n -exclude {NAME}" "\n Each occurrence of this option specifies the name" "\n (without slashes) of a file to be excluded from the" "\n listing. If it is a directory, also excludes all sub-directories" "\n and files therein." "\n" "\nSEE ALSO" "\n find(1), find_all_files_size_date, find_all_files_cksum_size" "\nAUTHOR" "\n Created 2007-01-17, 2017-05-04, 2022-09-21 by Jorge Stolfi, Unicamp" ) # ---------------------------------------------------------------------- # COMMAND LINE PARSING # Parse command line switches: nsecop=( ) exclop=( ) while [[ ( $# -ge 1 ) && ( "/$1" =~ /-.* ) ]]; do if [[ ( $# -ge 1 ) && ( "/$1" == "/-nsec" ) ]]; then nsecop=( "-nsec" ); shift; elif [[ ( $# -ge 2 ) && ( "/$1" == "/-exclude" ) ]]; then exclop+=( -exclude "$2" ); shift; shift; elif [[ ( $# -ge 2 ) && ( "/$1" == "/-exclude-path" ) ]]; then exclop+=( -exclude-path "$2" ); shift; shift; else echo "unknown option $1" 1>&2 ; echo -e "usage:\n ${PROG_HELP[@]}" 1>&2 ; exit 1 fi done dirs=( "$@" ) # END COMMAND LINE PARSING # ---------------------------------------------------------------------- echo "nsecop = [" "${nsecop[@]}" "]" 1>&2 echo "exclop = [" "${exclop[@]}" "]" 1>&2 # Regularize directory names to start with "./" and end with "/"; assume that "//" is same as "/": # echo "dirs = [" ${dirs[@]} "]" 1>&2 if [[ ${#dirs[@]} == 0 ]]; then # Default is current directory: dirs=( ./ ); else dirs=( \ ` echo "${dirs[@]}" \ | tr ' ' '\012' \ | sed -e '/^\$/d' -e 's:^\([^/]\):./\1:' -e 's:^[.]/[.]/:./:' -e 's:[/]*\$:/:' -e 's:[/][/]*:/:' \ ` \ ) fi echo "dirs = [" "${dirs[@]}" "]" 1>&2 tmp="/tmp/$$" csfile="${tmp}.csf" sdfile="${tmp}.sdf" echo "finding sizes and mod-dates..." 1>&2 find_all_files_size_date.sh "${exclop[@]}" "${nsecop[@]}" "${dirs[@]}" | sort -k3 > ${sdfile} echo "finding checksums and sizes..." 1>&2 find_all_files_cksum_size.sh "${exclop[@]}" "${dirs[@]}" | sort -k3 > ${csfile} # Merge into file with fields "{SIZE1} {SIZE2} {CKSUM} {DATE} {FNAME}" # then check that sizes are equal and write "{CKSUM} {SIZE} {DATE} {PNAME}": join \ -a1 -a2 -e '??' -j3 -o 1.1,2.2,2.1,1.2,0 \ ${sdfile} \ ${csfile} \ | gawk \ ' (NF != 5) { printf "** bug NF\n[[%s]]\n", $0 > "/dev/stderr"; exit(1); } ($1 == "??") { printf "** bug unp 1\n[[%s]]\n", $0 > "/dev/stderr"; } ($2 == "??") { printf "** bug unp 2\n[[%s]]\n", $0 > "/dev/stderr"; } ($1 != $2) { printf "** bug size\n[[%s]]\n", $0 > "/dev/stderr"; exit(1); } /^[0-9]+ [0-9]+ / { printf "%010s %14s %s %s\n", $3, $1, $4, $5; next; } // { printf "** bug\n[[%s]]\n", $0 > "/dev/stderr"; exit(1); } ' # rm -f ${sdfile} ${csfile} exit 0