diff --git a/validate_workflow.sh b/validate_workflow.sh index 42ca7930c896665d435e5b027c34b2dc11e914a6..6a94bc47c4f187dd5a02d0343931bdd365d6fc2e 100755 --- a/validate_workflow.sh +++ b/validate_workflow.sh @@ -73,6 +73,8 @@ WATCH_FOLDER="" RESULT_FOLDER="" VALID_FOLDER="" INVALID_FOLDER="" +MAX_STAT_LINES=100000 +MIN_STAT_LINES=10000 # PREDEFINED VALIDATORS declare -A validators @@ -102,8 +104,38 @@ comment_help() { sed -rn 's/^#hh ?//;T;p' "$0" } +calc_statistics() { + flock -x "$LOCKFILE" cat "$STATFILE" | awk -F "," '{cnt_invalid+=$3;total++} END {print total, cnt_invalid}' +} + print_statistics() { - echo "Not implemented yet" + stat=$(calc_statistics) + cnt_total=$(echo $stat | awk 'END {print $1}') + cnt_valid=$(echo $stat | awk 'END {print $2}') + cnt_invalid=$((cnt_total - cnt_valid)) + ratio=$(( 100*cnt_valid / cnt_total )) + echo "Validation Statistics" + echo "valid files: $cnt_valid" + echo "invalid files: $cnt_invalid" + echo "ratio: $ratio% valid" + +} + +update_statistics() { + is_valid=$1 + duration=$2 + ftype=$3 + workflow=$4 + stage=$5 + date=$(date +"%F%T") + debug "date=$date" + flock -x $LOCKFILE echo "$date,$is_valid,$duration,$ftype,$workflow,$stage" >> "$STATFILE" +} + +trim_statistics() { + debug "trim_statistics" + tail -n $MIN_STAT_LINES "$STATFILE" > "$STATFILE.new" || (error "could not trim $STATFILE to $STATFILE.new" ; exit 1 ) + mv "$STATFILE.new" "$STATFILE" || ( error "count not trim $STATFILE,because could not mv $STATFILE.new to $STATFILE"; exit 1 ) } debug() { @@ -118,10 +150,11 @@ error() { get_mimetype() { filename=$1 - res=$(file --mime-type "$filename") + res=$(file --mime-type "$filename" | sed -e "s/^.*: //") echo "$res" } + get_cli_args() { while [[ $# -gt 0 ]]; do case ${1} in @@ -129,6 +162,10 @@ get_cli_args() { comment_help exit 0 ;; + -D | --debug) + WITH_DEBUG=1 + shift + ;; -s | --statistics) print_statistics exit 0 @@ -176,10 +213,6 @@ get_cli_args() { WITH_PIPE=1 shift ;; - -D | --debug) - WITH_DEBUG=1 - shift - ;; *) error "'${1}' is invalid param. Please, give '$(basename "${0}") --help' a chance!" exit 1 @@ -239,6 +272,20 @@ get_cli_args() { fi fi fi + cachedir=$(dirname "$STATFILE") + if [ ! -d "$cachedir" ]; then + mkdir -p "$cachedir" || error "Could not create dir $cachedir, $?" + fi + if [ -e "$STATFILE" ]; then + lines=$( flock -x "$LOCKFILE" wc -l "$STATFILE" | cut -d " " -f 1) + debug "found $lines lines in $STATFILE)" + if [ "$lines" -gt $MAX_STAT_LINES ]; then + ( + flock -n 9 || exit 1 + trim_statistics + ) 9>"$LOCKFILE" + fi + fi } prepare_cmd() { @@ -247,9 +294,13 @@ prepare_cmd() { stage=$3 key=$(printf "%11s%4s%9s" "$mode" "$ftype" "$stage"|sed -e "y/ /_/") debug "prepare_cmd, key=$key" - cmd=${validators[$key]}; - debug "prepare_cmd, cmd=$cmd" - echo "$cmd" + if [[ ${validators[$key]:+1} ]]; then + cmd=${validators[$key]}; + debug "prepare_cmd, cmd=$cmd" + echo "$cmd" + else + debug "no valid command found using key $key" + fi } prepare_ftype() { @@ -290,10 +341,26 @@ estimate_mode() { echo $MODE } +exec_cmd() { + cmd=$1 + ftype=$2 + workflow=$3 + stage=$4 + start_t=$(date +"%s") + debug "scan_file, calling cmd='$cmd'" + $cmd || ( error "failed call of '$cmd', $?"; exit 1 ) + is_valid=$? + stop_t=$(date +"%s") + duration=$((stop_t - start_t)) + debug "scan_file, duration=$duration is_valid=$is_valid" + update_statistics "$is_valid" "$duration" "$ftype" "$workflow" "$stage" +} + + scan_file() { filename="$1" debug "scan_file, using filename: $filename" - mimetype=$(get_mimetype "$filename" | cut -d " " -f 2) + mimetype=$(get_mimetype "$filename") ftype=$(prepare_ftype "$mimetype") if [ "$MODE" = "auto" ]; then # try best guess @@ -301,16 +368,13 @@ scan_file() { fi if [ "$STAGE" = "any" ]; then for stage in current upcoming; do - debug "scan_file, using stage: $stage (STAGE mode '$STAGE')" cmd=$(prepare_cmd "$MODE" "$ftype" "$stage" | sed -e "s#FILE#'$filename'#") - debug "scan_file, calling cmd='$cmd'" - $cmd || ( error "failed call of '$cmd', $?"; exit 1 ) + exec_cmd "$cmd" "$ftype" "$MODE" "$stage" done else - debug "scan_file, using stage: $STAGE" cmd=$(prepare_cmd "$MODE" "$ftype" "$STAGE" | sed -e "s#FILE#'$filename'#") - debug "scan_file, calling cmd='$cmd'" - $cmd || ( error "failed call of '$cmd', $?"; exit 1 ) + exec_cmd "$cmd" "$ftype" "$MODE" "$stage" + fi }