From b48286537ba1ba3efe5ca407791c76bfe061562f Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Thu, 8 Oct 2020 11:36:30 +0200
Subject: [PATCH] - fixed progressbars - improved error reporting - add
 commandline option to correct file paths if mount path differs from remote
 mount path - added options check - fixed fixity calculation - added stats

---
 deep_fixitycheck.pl | 106 ++++++++++++++++++++++++++++++++------------
 1 file changed, 78 insertions(+), 28 deletions(-)

diff --git a/deep_fixitycheck.pl b/deep_fixitycheck.pl
index b379021..03fdc06 100644
--- a/deep_fixitycheck.pl
+++ b/deep_fixitycheck.pl
@@ -51,7 +51,7 @@ sub searching_ie_files ($$) {
     if (-d $_) { $first_two_levels_of_dirs++;}
   };
   find( $wanted_twolevel_dircount, $dir);
-  my $progressbar=Time::Progress->new(min => 0, max => $first_two_levels_of_dirs, smoothing => 1);
+  my $progressbar=Time::Progress->new(min => 0, max => $first_two_levels_of_dirs-1, smoothing => 1);
   my $dircount = 0;
   my $wanted_process_sip = sub  {
     if (-f && m/V(\d+)-IE\d+\.xml$/) {
@@ -182,29 +182,78 @@ sub parse_iexml ($$) {
   return \%ret;
 }
 
+sub map_file {
+  my $map_path = shift;
+  my $file = shift;
+  # map file from target system to local system using map_path
+  # example:
+  # dir = "/mnt/remote_rosetta/permanent/20xx/yy/"
+  # map_path = "/mnt/remote_rosetta/"
+  # file = "/permanent/2020/01/01/IE10000/FL1.tif" will be mapped to
+  # "/mnt/remote_rosetta/permanent/2020/01/01/IE10000/FL1.tif"
+  return path($map_path)->child($file)->stringify;
+}
 
-
+sub add_error ($$$$) {
+  my $filepath = shift;
+  my $fileorig = shift;
+  my $result = shift;
+  my $errormessage = shift;
+  $result->{errors}++;
+  push @{$result->{error_description}}, "file $filepath (original: $fileorig) $errormessage";
+  return $result;
+}
 ###############################################################################
 ###############################################################################
 ############# main ############################################################
 ###############################################################################
 ###############################################################################
-my $report_file = shift @ARGV;
+
+my @ARGV_tail;
 my $recovery = 1;
-my $dir = shift @ARGV;
-if (defined $dir && -d "$dir") {
+my $map_path;
+my $search_dir;
+my $report_file;
+GetOptions(
+    "recovery"     => \$recovery,
+    "map_path=s"   => \$map_path,
+    "search_dir=s" => \$search_dir,
+    "report=s"     => \$report_file,
+);
+if (!defined $map_path || length($map_path) < 1) {
+  die "map path is empty!";
+}
+if (!defined $search_dir || length($search_dir) < 1) {
+  die "search dir is empty!";
+}
+if (!defined $report_file || length($report_file) < 1) {
+  die "report file is empty!";
+};
+if (!path($map_path)->is_dir) {
+  die "map path $map_path does not exist!";
+}
+if (!path($search_dir)->is_dir) {
+  die "search dir $search_dir does not exist!";
+}
+if ($search_dir !~ m/^$map_path/) {
+  die "map_path $map_path should be part of search dir $search_dir!";
+}
+if (defined $search_dir && -d "$search_dir") {
     say "Preparing scan";
     say "searching IE files";
-    my $tmp_ies_dir = Path::Tiny->tempdir( TEMPLATE => "deep_fixitycheck_XXXXXXXXXXX", CLEANUP => 0);
+    my $tmp_ies_dir = Path::Tiny->tempdir( TEMPLATE => "deep_fixitycheck_XXXXXXXXXXX", CLEANUP => 1);
     my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies");
     $tmp_ies_unsorted_file->touch();
-    my $cnt_unsorted_files = searching_ie_files($dir, $tmp_ies_unsorted_file);
+    my $cnt_unsorted_files = searching_ie_files($search_dir, $tmp_ies_unsorted_file);
     say "checking IEs";
-    say $tmp_ies_unsorted_file->absolute()->stringify;
+    #say $tmp_ies_unsorted_file->absolute()->stringify;
     my $fh_unsorted_file = $tmp_ies_unsorted_file->openr();
     my $count = 0;
-    my $progressbar = Time::Progress->new(min=>0, max=>$cnt_unsorted_files, smoothing => 1);
+    my $progressbar = Time::Progress->new(min=>0, max=>$cnt_unsorted_files-1, smoothing => 1);
     my $stat;
+    $stat->{IEs} = 0;
+    $stat->{files} = 0;
+    $stat->{errors} = 0;
     while ( <$fh_unsorted_file>) {
       $stat->{IEs}++;
       chomp;
@@ -212,32 +261,33 @@ if (defined $dir && -d "$dir") {
       use Data::Printer;
       #p( $_);
       my $ret = parse_iexml( $_, $recovery);
-      p( $ret);
       foreach my $fileobj (@{ $ret->{files} }) {
         $stat->{files}++;
         my $result;
         # check size
-        my $filepath = path($fileobj->{filepath});
+        my $filepath = path(map_file($map_path, $fileobj->{filepath}));
+        my $fileorig = path($fileobj->{filepath});
         $result->{errors} = 0;
         $result->{exist} = $filepath->is_file();
         if (!$result->{exist}) {
-          $result->{errors}++;
-          push @{$result->{error_description}}, "file $filepath does not exist nor readable";
+          add_error($filepath, $fileorig, $result, "does not exist nor readable");
         }
-        $result->{size} = -s $filepath->visit(sub{ my ($path, $state) = @_;
-                return if $path->is_dir;
-                $state->{$path} = -s $path || "";
-}, {recurse => 0});
-        use Data::Printer; p($result);
-        if ($result->{size} ne $ret->{size}) {
-          $result->{errors}++;
-          push @{$result->{error_description}}, "file $filepath has size $result->{size} but $ret->{size} was expected";
+        $result->{size} = -s $filepath->stringify;
+        if ($result->{size} ne $fileobj->{size}) {
+          add_error($filepath, $fileorig, $result, "has size $result->{size} but $fileobj->{size} was expected");
         }
         foreach my $fixity_algorithm ( @algorithms) {
-          $result->{fixity}->{$fixity_algorithm} = $filepath->digest($fixity_algorithm);
-          if ($result->{fixity}->{$fixity_algorithm} ne $ret->{fixity}->{$fixity_algorithm}) {
-            $result->{errors}++;
-            push @{$result->{error_description}}, "file $filepath has fixity $result->{fixity}->{$fixity_algorithm} for algorithm $fixity_algorithm, but $ret->{fixity}->{$fixity_algorithm} was expected";
+          my $digest_name = $fixity_algorithm;
+          $digest_name =~ s/CRC32/CRC-32/;
+          $digest_name =~ s/SHA1/SHA-1/;
+          $digest_name =~ s/SHA256/SHA-256/;
+          $digest_name =~ s/SHA512/SHA-512/;
+          if ($fileobj->{fixity}->{$fixity_algorithm}) {
+            $result->{fixity}->{$fixity_algorithm} = $filepath->digest($digest_name);
+            if ($result->{fixity}->{$fixity_algorithm} ne $fileobj->{fixity}->{$fixity_algorithm}) {
+              say "$filepath has ", $result->{fixity}->{$fixity_algorithm}, " but ", $fileobj->{fixity}->{$fixity_algorithm}, " was expected";
+              add_error($filepath, $fileorig, $result, "has fixity $result->{fixity}->{$fixity_algorithm} for algorithm $fixity_algorithm, but $fileobj->{fixity}->{$fixity_algorithm} was expected");
+            }
           }
         }
         if ($result->{errors} > 0) {
@@ -246,14 +296,14 @@ if (defined $dir && -d "$dir") {
             path($report_file)->append_utf8("\t$errors\n");
           }
           $stat->{errors} += $result->{errors};
+        } else {
+          path($report_file)->append_utf8("IE $_ is fine! :)\n");
         }
       }
       # write report
     }
   say "";
-  use Data::Printer;
-  p( $stat);
-
+  say "Scanned $stat->{IEs} IEs with $stat->{files} files, found $stat->{errors} errors";
 } else {
   die "no directory given on commandline";
 }
-- 
GitLab