Poormens desktop search engine.sh
Version vom 13. September 2007, 15:33 Uhr von Fries43 (Diskussion | Beiträge) (→Archivtypen: +mehr archive)
Was
- Umsetzung von Rexxbot auf Linux
- Es wird ein Listing beliebiger Verzeichnisse in beliebiger Tiefe erstellt
- zu jeder Datei wird neben Dateidatum und -groesse auch MIME- und Datei-Typ gespeichert
- Archive werden temporär entpackt und ebenfalls indexiert
- ebenso Archive in Archiven in Archiven in Archiven [*n]
Archivtypen
- ist-Zustand:
- RAR, ZIP, TAR, GZIP, BZIP2
- soll:
- ISO, LHA, LZX, ARJ, 7ZIP, gesplittete RARs
Systemanforderungen
- Linux
- Aktuelle /etc/magic fuer das Programm file zur Dateityperkennung
- apt-get install sqlite
Programmcode
#!/bin/sh fkt_create_db () { DB="${WORKING_DIR}$(echo "$1" | md5sum -b | cut -d" " -f1).db" rm -f "$DB" sqlite "$DB" "create table n (id INTEGER PRIMARY KEY,deep INTEGER,size INTEGER,time INTEGER,fpat TEXT,fnam TEXT,type TEXT,mime TEXT,hash TEXT);" } fkt_decompress () { let DEEP+=1 echo "DECOMPRESS ($1): file \""$FPAT/$FNAM"\"" mkdir "${WORKING_DIR}${HASH}/" cd "${WORKING_DIR}${HASH}/" fkt_decompress_$1 "$FPAT/$FNAM" "${WORKING_DIR}${HASH}/$FNAM" fkt_gen_index "${WORKING_DIR}${HASH}/" cd "${WORKING_DIR}" rm -fR "${WORKING_DIR}${HASH}/" let DEEP-=1 } fkt_decompress_rar () { unrar -inul -p- e "$1" } fkt_decompress_zip () { unzip -qq "$1" } fkt_decompress_tar () { tar -xf "$1" } fkt_decompress_bzip2 () { cp "$1" "$2" bzip2 -d "$2" } fkt_decompress_gzip () { cp "$1" "$2.gz" gzip -d "$2.gz" } fkt_test_for_archiv () { if [ "$MIME" = "application/x-rar" ]; then fkt_decompress rar elif [ "$MIME" = "application/x-zip" ]; then fkt_decompress zip elif [ "$MIME" = "application/x-tar, POSIX" ]; then fkt_decompress tar elif [ "$MIME" = "application/x-bzip2" ]; then fkt_decompress bzip2 elif [ "${TYPE:0:20}" = "gzip compressed data" ]; then fkt_decompress gzip fi } fkt_gen_index () { echo "MAKE_INDEX: updating database \"$DB\" with index of \"$1\", deep = $DEEP" find "$1" -type f -printf "%s|%A@|%h|%f\n" | while read LINE; do let LINES+=1 IFS="|" set $LINE FPAT="$(echo "$3" | sed "s/'/''/g")"; test "$DEEP" -gt 0 && a=a FNAM="$(echo "$4" | sed "s/'/''/g")" TYPE="$(file -b "$3/$4" | sed "s/'/''/g")" MIME="$(file -ib "$3/$4")" HASH="$(md5sum -b "$3/$4" | cut -d" " -f1)" DATA="'$DEEP','$1','$2','$FPAT','$FNAM','$TYPE','$MIME','$HASH'" sqlite "$DB" "insert into n (deep,size,time,fpat,fnam,type,mime,hash) values ($DATA);" fkt_test_for_archiv done echo "leaving directory \"$1\", deep = $DEEP" unset IFS } fkt_report () { sqlite "$DB" "select size from n" | awk '{s=s+$0;c++}END{print s" bytes in "c" files"}' } if [ -z "$1" ] || [ -z "$2" ]; then echo "Usage: $0 /directory/to/crawl/ /working/dir/" else DEEP=0 WORKING_DIR="$2" fkt_create_db "$1" fkt_gen_index "$1" fkt_report fi