Skip to content
Snippets Groups Projects
Select Git revision
  • a39f6ac0abc45fef780d46ab8e56d5cf55039846
  • master default protected
2 results

sql-backup.py

Blame
  • validate_workflow.sh 16.14 KiB
    #!/usr/bin/env bash
    
    ### META
    # AUTHORS:
    #  - Andreas Romeyke (<Andreas.Romeyke@slub-dresden.de>)
    
    # HINT for developers
    #  - indent code with 4 spaces
    #  - use UTF8 without BOM
    #  - use 'newline' as line ending
    # in IntelliJ idea the settings are:
    #  Indent case statements
    #  Use Unix line spearators
    #  Tab size: 4
    #  Indent: 4
    #  Shfmt formatter: ~/.local/share/JetBrains/IdeaIC2022.1/Shell Script/shfmt
    # in vim use:
    #   set tabwidth=4
    #   set indent=4
    
    #hh A cli tool which uses different validators to validate SLUB workflows.
    #hh
    #hh Usage: validate_workflow.sh [-h] | [-s] | -w watchfolder -r resultfolder [-d] [...]
    #hh
    #hh Options:
    #hh
    #hh      -h, --help
    #hh                 help output
    #hh      -w, --watch-folder <DIR>
    #hh                 watches folder for files which should be evaluated
    #hh      -r, --result-folder <DIR>
    #hh                 target folder to store validation results
    #hh      -f, --files-mode [sort|delete|nothing]
    #hh                 mode=sort sorts files to valid- and invalid-folder,
    #hh                 mode=delete deletes already checked files from watch-folder
    #hh                 mode=nothing files in watch-folder remain untouched
    #hh                 The mode=delete is default.
    #hh      -v, --valid-folder <DIR> 
    #hh                 only needed if files-mode=sort, moves valid files from
    #hh                 watch-folder to valid-folder
    #hh      -i, --invalid-folder <DIR>
    #hh                 only needed if files-mode=sort, moves invalid files from
    #hh                 watch-folder to invalid folder
    #hh      -s, --statistics
    #hh                 print a statistic
    #hh      -m, --mode [auto, mediathek, fotothek, save, kitodo, lfulg]
    #hh                the mode 'auto' tries to check files based on file mime-types.
    #hh                The other modes are actual workflow names.
    #hh      -d, --daemon
    #hh                starts a daemon, works only in --files-mode=delete
    #hh      -t, --stage [current,upcoming,any]
    #hh                valides with current or upcoming profile/validator 
    #hh                or any if any is valid
    #hh      -p, --pipe
    #hh                validates a single filestream from STDIN, writes result to STDOUT
    #hh                no daemon, no folder nor filemode params needed
    #hh      -G, --gen-bash-complete
    #hh                prints a script for bash autocompletion, should be copied
    #hh                to /etc/bash_completion.d/validate_workflow
    #hh
    
    # expected programs:
    # file, ...
    
    # DEFAULTS
    WITH_DAEMON=0
    WITH_DEBUG=0
    WITH_PIPE=0
    STAGE=any
    MODE=auto
    FILES_MODE=nothing
    STATFILE=${HOME}/.cache/validate_workflows/statistics.cnt
    LOCKFILE=/var/lock/validate_workflows.lock
    WATCH_FOLDER=""
    RESULT_FOLDER=""
    VALID_FOLDER=""
    INVALID_FOLDER=""
    MAX_STAT_LINES=100000
    MIN_STAT_LINES=10000
    
    # PREDEFINED VALIDATORS
    declare -A validators
    #validators[workflow][filetype][stage]
    # workflow has max 11 chars
    # filetype has max 4 chars
    # stage has max 9 chars
    # each validator should return true if file was valid
    validators[__mediathek_mka__current]="/usr/bin/mediaconch -ft -p /etc/mediaconch/mediathek_retroaudio.xml FILE"
    validators[__mediathek_mka_upcoming]="/usr/bin/mediaconch -ft -p /etc/mediaconch/mediathek_retroaudio.xml FILE"
    validators[_______save_mkv__current]="/usr/bin/mediaconch -ft -p /etc/mediaconch/save_retrovideofilm_current.xml FILE" 
    validators[_______save_mkv_upcoming]="/usr/bin/mediaconch -ft -p /etc/mediaconch/save_retrovideofilm_upcoming.xml FILE"
    validators[_______save_mka__current]="/usr/bin/mediaconch -ft -p /etc/mediaconch/save_retroaudio_current.xml FILE"
    validators[_______save_mka_upcoming]="/usr/bin/mediaconch -ft -p /etc/mediaconch/save_retroaudio_current.xml FILE"
    validators[_____kitodo_tif__current]="/usr/bin/checkit_tiff_current /etc/checkit_tiff/retromono_current FILE"
    validators[_____kitodo_tif_upcoming]="/usr/bin/checkit_tiff_upcoming /etc/checkit_tiff/retromono_upcoming FILE"
    validators[______lfulg_tif__current]="/usr/bin/checkit_tiff_current /etc/checkit_tiff/retrogeomono_current FILE"
    validators[______lfulg_tif_upcoming]="/usr/bin/checkit_tiff_upcoming /etc/checkit_tiff/retrogeomono_upcoming FILE"
    validators[___fotothek_tif__current]="/usr/bin/checkit_tiff_current /etc/checkit_tiff/retrofoto_current FILE"
    validators[___fotothek_tif_upcoming]="/usr/bin/checkit_tiff_upcoming /etc/checkit_tiff/retrofoto_upcoming FILE"
    validators[_____kitodo_icc__current]="PATH=${PATH}:./IccProfLib/ Tools/IccDumpProfile/iccDumpProfile -v FILE"
    validators[_____kitodo_icc_upcoming]="PATH=${PATH}:./IccProfLib/ Tools/IccDumpProfile/iccDumpProfile -v FILE"
    validators[______lfulg_icc__current]="PATH=${PATH}:./IccProfLib/ Tools/IccDumpProfile/iccDumpProfile -v FILE"
    validators[______lfulg_icc_upcoming]="PATH=${PATH}:./IccProfLib/ Tools/IccDumpProfile/iccDumpProfile -v FILE"
    validators[___fotothek_icc__current]="PATH=${PATH}:./IccProfLib/ Tools/IccDumpProfile/iccDumpProfile -v FILE"
    validators[___fotothek_icc_upcoming]="PATH=${PATH}:./IccProfLib/ Tools/IccDumpProfile/iccDumpProfile -v FILE"
    
    
    set -o nounset                              # Treat unset variables as an error
    
    # Don't just call this function "help()", as that's a reserved command in Bash. 
    comment_help() {  
        sed -rn 's/^#hh ?//;T;p' "$0" 
    }
    
    calc_statistics() {
        flock -x "$LOCKFILE" cat "$STATFILE" | awk -F "," '{cnt_invalid+=$3;total++} END {print total, cnt_invalid}'
    }
    
    print_statistics() {
        local stat
        local cnt_total
        local cnt_valid
        local cnt_invalid
        local ratio
        stat=$(calc_statistics)
        cnt_total=$(echo "$stat" | awk 'END {print $1}')
        cnt_valid=$(echo "$stat" | awk 'END {print $2}')
        cnt_invalid=$((cnt_total - cnt_valid))
        ratio=$(( 100*cnt_valid / cnt_total ))
        echo "Validation Statistics"
        echo "valid files:   $cnt_valid"
        echo "invalid files: $cnt_invalid"
        echo "ratio:         $ratio% valid"
    
    }
    
    update_statistics() {
        local is_valid=$1
        local duration=$2
        local ftype=$3
        local workflow=$4
        local stage=$5
        local date
        date=$(date +"%F%T")
        debug "date=$date"
        flock -x $LOCKFILE echo "$date,$is_valid,$duration,$ftype,$workflow,$stage" >> "$STATFILE"
    }
    
    trim_statistics() {
        debug "trim_statistics"
        tail -n $MIN_STAT_LINES "$STATFILE" > "$STATFILE.new" || (error "could not trim $STATFILE to $STATFILE.new"  ; exit 1 )
        mv "$STATFILE.new" "$STATFILE" || ( error "count not trim $STATFILE,because could not mv $STATFILE.new to $STATFILE"; exit 1 )
    }
    
    debug() {
        if [ $WITH_DEBUG -eq 1 ]; then
            >&2 echo "DEBUG: $1"
        fi
    }
    
    error() {
        >&2 echo "ERROR: $1"
    }
    
    get_mimetype() {
        local filename=$1
        echo "$( file --mime-type "$filename" |  sed -e "s/^.*: //" )"
    }
    
    
    get_cli_args() {
        local lines
        local cachedir
        while [[ $# -gt 0 ]]; do
            case ${1} in
                -h | --help)
                    comment_help
                    exit 0
                    ;;
                -G | --gen_bash_complete)
    
                    cat <<COMPLETE
                    
                    complete -F _validate_workflow_completion $(basename "${0}")
    COMPLETE
                exit 0
                ;;
                -D | --debug)
                    WITH_DEBUG=1
                    shift
                    ;;
                -s | --statistics)
                    print_statistics
                    exit 0
                    ;;
                -w | --watch-folder)
                    WATCH_FOLDER="${2}"
                    shift
                    shift
                    ;;
                -r | --result-folder)
                    RESULT_FOLDER="${2}"
                    shift
                    shift
                    ;;
                -v | --valid-folder)
                    VALID_FOLDER="${2}"
                    shift
                    shift
                    ;;
                -i | --invalid-folder)
                    INVALID_FOLDER="${2}"
                    shift
                    shift
                    ;;
                -m | --mode)
                    MODE="${2}"
                    shift
                    shift
                    ;;
                -t | --stage)
                    STAGE="${2}"
                    shift
                    shift
                    ;;
                -d | --daemon)
                    WITH_DAEMON=1
                    shift
                    ;;
                -f | --files-mode)
                    FILES_MODE="${2}"
                    shift
                    shift
                    ;;
                -p | --pipe)
                    WITH_PIPE=1
                    shift
                    ;;
                *)
                    error "'${1}' is invalid param. Please, give '$(basename "${0}") --help' a chance!"
                    exit 1
                    ;;
            esac
        done
        if [ "$FILES_MODE" != "sort" ] && [ "$FILES_MODE" != "delete" ] && [ "$FILES_MODE" != "nothing" ]; then
            error "param --files-mode must be 'sort', 'delete' or 'nothing'!"
            exit 1
        fi
        if [ "$MODE" != "auto" ] \
            && [ "$MODE" != "mediathek" ] \
            && [ "$MODE" != "fotothek" ] \
            && [ "$MODE" != "save" ] \
            && [ "$MODE" != "kitodo" ] \
            && [ "$MODE" != "lfulg" ]; then
            error "param --mode must be 'auto', 'mediathek', 'fotothek', 'save', 'kitodo' or 'lfulg'!"
            exit 1
        fi
        if [ "$WITH_PIPE" -eq 1 ]; then
            if 
                [ "$WITH_DAEMON" -eq 1 ] \
                || [ -n "$WATCH_FOLDER" ] \
                || [ -n "$RESULT_FOLDER" ] \
                || [ -n "$VALID_FOLDER" ] \
                || [ -n "$INVALID_FOLDER" ] \
                || [ "$FILES_MODE" = "sort" ] \
                ; then 
                error "param --pipe not combineable with params --daemon, --result-folder, --watch-folder, --valid-folder, --invalid-folder, --files-mode"
                exit 1
            fi
        else
            if [ "$WITH_DAEMON" -eq 1 ] &&  [ "$FILES_MODE" = "sort" ]; then
                error "param --daemon does only work with param --mode='delete' or --mode='nothing'!"
                exit 1
            fi
            if [ "$STAGE" != "current" ] && [ "$STAGE" != "upcoming" ] && [ "$STAGE" != "any" ]; then
                error "--param stage must be 'any', 'current' or 'upcoming'!"
                exit 1
            fi
            if [ ! -d "$WATCH_FOLDER" ]; then
                error "watch folder '$WATCH_FOLDER' does not exist!"
                exit 1
            fi
            if [ ! -d "$RESULT_FOLDER" ]; then
                error "result folder '$RESULT_FOLDER' does not exist!"
                exit 1
            fi
            if [ "$FILES_MODE" = "sort" ]; then
                if [ ! -d "$VALID_FOLDER" ]; then
                    error "valid folder '$VALID_FOLDER' does not exist!"
                    exit 1
                fi
                if [ ! -d "$INVALID_FOLDER" ]; then
                    error "invalid folder '$INVALID_FOLDER' does not exist!"
                    exit 1
                fi
            fi
        fi
        cachedir=$(dirname "$STATFILE")
        if [ ! -d "$cachedir" ]; then
            mkdir -p "$cachedir" || error "Could not create dir $cachedir, $?"
        fi
        if [ -e "$STATFILE" ]; then
            lines=$( flock -x "$LOCKFILE" wc -l "$STATFILE" | cut -d " " -f 1)
            debug "found $lines lines in $STATFILE)"
            if [ "$lines" -gt $MAX_STAT_LINES ]; then
                (
                flock -n 9 || exit 1
                trim_statistics
                ) 9>"$LOCKFILE"
            fi
        fi
    }
    
    prepare_cmd() {
        local mode=$1
        local ftype=$2
        local stage=$3
        local key
        local cmd
        key=$(printf "%11s%4s%9s" "$mode" "$ftype" "$stage"|sed -e "y/ /_/")
        debug "prepare_cmd, key=$key"
        if [[ ${validators[$key]:+1} ]]; then
            cmd=${validators[$key]};
            debug "prepare_cmd, cmd=$cmd"
            echo "$cmd"
        else
            debug "no valid command found using key $key"
        fi
    }
    
    prepare_ftype() {
        local mimetype=$1
        local ftype
        debug "prepare_ftype, using mimetype: $mimetype"
        case ${mimetype} in
            "image/tiff")
                ftype="tif"
                ;;
            "video/x-matroska")
                ftype="mkv"
                ;;
            "application/vnd.iccprofile")
                ftype="icc"
                ;;
            "application/pdf")
                ftype="pdf"
                ;;
            *)
                error "unknown file format '$mimetype'"
                exit 1
                ;;
        esac
        debug "prepare_ftype, detect ftype: $ftype"
        echo "$ftype"
    }
    
    estimate_mode() {
        local mimetype=$1
        debug "estimate_mode, using mimetype: $mimetype"
        case ${mimetype} in
            "image/tiff")
                MODE="kitodo"
                ;;
            "video/x-matroska")
                MODE="save"
                ;;
            *)
                error "workflow not detectable"
                exit 1
                ;;
        esac
        debug "estimate_mode, detected mode: $MODE"
        echo "$MODE"
    }
    
    exec_cmd() {
        local cmd=$1
        local ftype=$2
        local workflow=$3
        local stage=$4
        local log=$5
        local start_t
        local stop_t
        start_t=$(date +"%s")
        debug "scan_file, calling cmd='$cmd'"
        $cmd >>"$log" 2>&1
        local is_valid=$?
        stop_t=$(date +"%s")
        local duration=$((stop_t - start_t))
        debug "scan_file, duration=$duration is_valid=$is_valid log=$log"
        update_statistics "$is_valid" "$duration" "$ftype" "$workflow" "$stage"
        echo "$is_valid"
    }
    
    handle_input_if_requested() {
        local filename=$1
        local is_valid=$2
        debug "handle_input_if_requested, filename=$filename is_valid=$is_valid"
        if [ "$FILES_MODE" = "sort" ]; then
            if [ "$is_valid" -eq 0 ]; then
                debug "handle_input_if_requested, mv $filename to $VALID_FOLDER, because valid"
                mv "$filename" "$VALID_FOLDER"
            else
                debug "handle_input_if_requested, mv $filename to $INVALID_FOLDER, because invalid"
                mv "$filename" "$INVALID_FOLDER"
            fi
        elif [ "$FILES_MODE" = "delete" ]; then
            debug "handle_input_if_requested, rm $filename from watchfolder $WATCH_FOLDER"
            rm -f "$filename"
        fi
    }
    
    get_logfile() {
        local filename=$1
        local logname
        logname=$(echo "$filename"| sed -e "s#^${WATCH_FOLDER}#${RESULT_FOLDER}#"  -e "s#\$#.log#")
        debug "get_logfile, logname=$logname (filename=$filename)"
        echo "$logname"
    }
    
    
    scan_file() {
        local filename="$1"
        local mimetype
        local ftype
        local logname
        local cmd
        local is_valid
        debug "scan_file, using filename: $filename"
        mimetype=$(get_mimetype "$filename")
        ftype=$(prepare_ftype "$mimetype")
        logname=$(get_logfile "$filename")
        if [ "$MODE" = "auto" ]; then
            # try best guess
            MODE=$(estimate_mode "$mimetype")
        fi
        trap "" SIGINT
        debug "scan_file, === entering protected area ==="
        if [ "$STAGE" = "any" ]; then
            is_valid=0
            for stage in current upcoming; do
                cmd=$(prepare_cmd "$MODE" "$ftype" "$stage" | sed -e "s#FILE#$filename#")
                is_valid=$(exec_cmd "$cmd" "$ftype" "$MODE" "$stage" "$logname")
                if [ "$is_valid" -eq 0 ]; then 
                    break
                fi
            done
            handle_input_if_requested "$filename" "$is_valid"
        else
            cmd=$(prepare_cmd "$MODE" "$ftype" "$STAGE" | sed -e "s#FILE#$filename#")
            is_valid=$(exec_cmd "$cmd" "$ftype" "$MODE" "$stage" "$logname")
            handle_input_if_requested "$filename" "$is_valid"
        fi
        debug "scan_file, === leaving protected area ==="
        trap - SIGINT
        debug "---"
    }
    
    scan_dir() {
        find "$1" -type f -print0| while IFS= read -r -d '' filename; do
        scan_file "$filename"
    done
    }
    
    #### MAIN
    
    get_cli_args "$@"
    #trap signalhandler SIGINT SIGABRT #sigint
    if [ "$WITH_PIPE" -eq 1 ]; then
        #cli mode, use stdin
        debug "checking stream"
        filename=$(mktemp --tmpdir validate_wrg.XXXX)
        cat - > "$filename"
        scan_file "$filename"
        rm -f "$filename" || error "could not remove temporary file '$filename'"
    else 
        if [ "$WITH_DAEMON" -eq 1 ]; then
            # TODO: protect DAEMON from STRG-C for clean shutdown
            # echo daemon mode, use inotify to watch changes
            debug "starting daemon"
            scan_dir "$WATCH_FOLDER" # to clean up existing files
            /usr/bin/inotifywait --monitor --recursive --event create \
                --event attrib --event moved_to --format "%w%f" "$WATCH_FOLDER" \
                | while read -r filename; do
                        debug "called inotifywait using /usr/bin/inotifywait --monitor
                        --recursive --event create --event attrib --event moved_to --format '%f' $WATCH_FOLDER"
                        scan_file "$filename"
                    done
                    debug "stopping daemon"
                else
                    # cli mode, scan watch folder once
                    debug "checking dir $WATCH_FOLDER"
                    scan_dir "$WATCH_FOLDER"
        fi
    fi