Poormens desktop search engine.sh

Aus Weimarnetz Wiki
Zur Navigation springen Zur Suche springen

Was

  • Umsetzung von Rexxbot auf Linux
  • Es wird ein Listing beliebiger Verzeichnisse in beliebiger Tiefe erstellt
  • zu jeder Datei wird neben Dateidatum und -groesse auch MIME- und Datei-Typ gespeichert
  • Archive werden temporär entpackt und ebenfalls indexiert
  • ebenso Archive in Archiven in Archiven in Archiven [*n]

Archivtypen

  • ist-Zustand:
    • RAR, ZIP, TAR, GZIP, BZIP2
  • soll:
    • ISO, LHA, LZX, ARJ, 7ZIP, gesplittete RARs

Systemanforderungen

  • Linux
  • Aktuelle /etc/magic fuer das Programm file zur Dateityperkennung
  • apt-get install sqlite

Programmcode

#!/bin/sh

fkt_create_db () {
        DB="${WORKING_DIR}$(echo "$1" | md5sum -b | cut -d" " -f1).db"

        rm -f  "$DB"
        sqlite "$DB" "create table n (id INTEGER PRIMARY KEY,deep INTEGER,size INTEGER,time INTEGER,fpat TEXT,fnam TEXT,type TEXT,mime TEXT,hash TEXT);"
}

fkt_decompress () {
        let DEEP+=1
        echo "DECOMPRESS ($1): file \""$FPAT/$FNAM"\""

        mkdir   "${WORKING_DIR}${HASH}/"
        cd      "${WORKING_DIR}${HASH}/"
        
        fkt_decompress_$1 "$FPAT/$FNAM" "${WORKING_DIR}${HASH}/$FNAM"
        fkt_gen_index "${WORKING_DIR}${HASH}/"
        
        cd      "${WORKING_DIR}"
        rm -fR  "${WORKING_DIR}${HASH}/"
        
        let DEEP-=1
}

fkt_decompress_rar () {
        unrar -inul -p- e "$1"
}

fkt_decompress_zip () {
        unzip -qq "$1"
}

fkt_decompress_tar () {
        tar -xf "$1"
}

fkt_decompress_bzip2 () {
        cp "$1" "$2"
        bzip2 -d "$2"
}

fkt_decompress_gzip () {
        cp "$1" "$2.gz"
        gzip -d "$2.gz"
}

fkt_test_for_archiv () {
          if [ "$MIME" = "application/x-rar" ];                 then fkt_decompress rar
        elif [ "$MIME" = "application/x-zip" ];                 then fkt_decompress zip
        elif [ "$MIME" = "application/x-tar, POSIX" ];          then fkt_decompress tar
        elif [ "$MIME" = "application/x-bzip2" ];               then fkt_decompress bzip2
        elif [ "${TYPE:0:20}" = "gzip compressed data" ];       then fkt_decompress gzip
        fi
}

fkt_gen_index () {
        echo "MAKE_INDEX: updating database \"$DB\" with index of \"$1\", deep = $DEEP"

        find "$1" -type f -printf "%s|%A@|%h|%f\n" |
         while read LINE; do
                let LINES+=1
                IFS="|"
                set $LINE

                FPAT="$(echo      "$3"    | sed "s/'/''/g")"; test "$DEEP" -gt 0 && a=a
                FNAM="$(echo         "$4" | sed "s/'/''/g")"
                TYPE="$(file -b   "$3/$4" | sed "s/'/''/g")"
                MIME="$(file -ib  "$3/$4")"
                HASH="$(md5sum -b "$3/$4" | cut -d" " -f1)"
                DATA="'$DEEP','$1','$2','$FPAT','$FNAM','$TYPE','$MIME','$HASH'"
        
                sqlite "$DB" "insert into n (deep,size,time,fpat,fnam,type,mime,hash) values ($DATA);"

                fkt_test_for_archiv
        done

        echo "leaving directory \"$1\", deep = $DEEP"
        unset IFS
}

fkt_report () {
        sqlite "$DB" "select size from n" | awk '{s=s+$0;c++}END{print s" bytes in "c" files"}'
}

if [ -z "$1" ] || [ -z "$2" ]; then
        echo "Usage: $0 /directory/to/crawl/ /working/dir/"
else
        DEEP=0
        WORKING_DIR="$2"
        fkt_create_db   "$1"
        fkt_gen_index   "$1"
        fkt_report
fi