From 7a19bb10c67de4887c1d761943e15a54fede7282 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Thu, 8 Oct 2020 12:16:37 +0200
Subject: [PATCH] - refactoring, extracting checks in functions - improved
 add_error() - added help - added POD

---
 deep_fixitycheck.pl | 124 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 94 insertions(+), 30 deletions(-)

diff --git a/deep_fixitycheck.pl b/deep_fixitycheck.pl
index 03fdc06..b2be16a 100644
--- a/deep_fixitycheck.pl
+++ b/deep_fixitycheck.pl
@@ -29,7 +29,7 @@ use Time::Progress;
 use XML::LibXML::XPathContext;
 use Getopt::Long;
 use constant DEBUG => 0; # no debug
-
+use Pod::Usage;
 use IO::Handle;
 STDOUT->autoflush(1);
 # guarantee, that output will be UTF8
@@ -194,32 +194,74 @@ sub map_file {
   return path($map_path)->child($file)->stringify;
 }
 
-sub add_error ($$$$) {
-  my $filepath = shift;
-  my $fileorig = shift;
+sub add_error ($$$) {
+  my $fileobj = shift;
   my $result = shift;
   my $errormessage = shift;
   $result->{errors}++;
-  push @{$result->{error_description}}, "file $filepath (original: $fileorig) $errormessage";
+  push @{$result->{error_description}}, "file $fileobj->{file_mounted} (original: $fileobj->{filepath}) $errormessage";
+  return $result;
+}
+
+sub check_if_file_exist($$) {
+  my $fileobj = shift;
+  my $result = shift;
+  $result->{errors} = 0;
+  $result->{exist} = path($fileobj->{file_mounted})->is_file();
+  if (!$result->{exist}) {
+    add_error($fileobj, $result, "does not exist nor readable");
+  }
+  return $result;
+}
+
+sub check_file_size($$) {
+  my $fileobj = shift;
+  my $result = shift;
+  $result->{size} = -s path($fileobj->{file_mounted})->stringify;
+  if ($result->{size} ne $fileobj->{size}) {
+    add_error($fileobj, $result, "has size $result->{size} but $fileobj->{size} was expected");
+  }
+  return $result;
+}
+
+sub check_file_fixity($$$) {
+  my $fileobj = shift;
+  my $result = shift;
+  my $fixity_algorithm = shift;
+  my $digest_mapping = {
+      'CRC32'  => 'CRC-32',
+      'MD5'    => 'MD5',
+      'SHA1'   => 'SHA-1',
+      'SHA256' => 'SHA-256',
+      'SHA512' => 'SHA-512'
+  };
+  my $digest_name = $digest_mapping->{$fixity_algorithm};
+  $result->{fixity}->{$fixity_algorithm} = path($fileobj->{file_mounted})->digest($digest_name);
+  if ($result->{fixity}->{$fixity_algorithm} ne $fileobj->{fixity}->{$fixity_algorithm}) {
+    add_error($fileobj, $result, "has fixity $result->{fixity}->{$fixity_algorithm} for algorithm $fixity_algorithm, but $fileobj->{fixity}->{$fixity_algorithm} was expected");
+  }
   return $result;
 }
+
 ###############################################################################
 ###############################################################################
 ############# main ############################################################
 ###############################################################################
 ###############################################################################
 
-my @ARGV_tail;
 my $recovery = 1;
 my $map_path;
 my $search_dir;
 my $report_file;
+my $help;
 GetOptions(
     "recovery"     => \$recovery,
     "map_path=s"   => \$map_path,
     "search_dir=s" => \$search_dir,
     "report=s"     => \$report_file,
-);
+    "help|?"       => \$help
+) or die "Try --help for usage information";
+pod2usage(1) if $help;
 if (!defined $map_path || length($map_path) < 1) {
   die "map path is empty!";
 }
@@ -258,36 +300,17 @@ if (defined $search_dir && -d "$search_dir") {
       $stat->{IEs}++;
       chomp;
       print $progressbar->report("parse IE files:       %40b  ETA: %E   \r", $count++);
-      use Data::Printer;
-      #p( $_);
       my $ret = parse_iexml( $_, $recovery);
       foreach my $fileobj (@{ $ret->{files} }) {
+        $fileobj->{file_mounted} = map_file($map_path, $fileobj->{filepath});
         $stat->{files}++;
         my $result;
-        # check size
-        my $filepath = path(map_file($map_path, $fileobj->{filepath}));
-        my $fileorig = path($fileobj->{filepath});
         $result->{errors} = 0;
-        $result->{exist} = $filepath->is_file();
-        if (!$result->{exist}) {
-          add_error($filepath, $fileorig, $result, "does not exist nor readable");
-        }
-        $result->{size} = -s $filepath->stringify;
-        if ($result->{size} ne $fileobj->{size}) {
-          add_error($filepath, $fileorig, $result, "has size $result->{size} but $fileobj->{size} was expected");
-        }
+        $result = check_if_file_exist($fileobj, $result);
+        $result = check_file_size($fileobj, $result);
         foreach my $fixity_algorithm ( @algorithms) {
-          my $digest_name = $fixity_algorithm;
-          $digest_name =~ s/CRC32/CRC-32/;
-          $digest_name =~ s/SHA1/SHA-1/;
-          $digest_name =~ s/SHA256/SHA-256/;
-          $digest_name =~ s/SHA512/SHA-512/;
           if ($fileobj->{fixity}->{$fixity_algorithm}) {
-            $result->{fixity}->{$fixity_algorithm} = $filepath->digest($digest_name);
-            if ($result->{fixity}->{$fixity_algorithm} ne $fileobj->{fixity}->{$fixity_algorithm}) {
-              say "$filepath has ", $result->{fixity}->{$fixity_algorithm}, " but ", $fileobj->{fixity}->{$fixity_algorithm}, " was expected";
-              add_error($filepath, $fileorig, $result, "has fixity $result->{fixity}->{$fixity_algorithm} for algorithm $fixity_algorithm, but $fileobj->{fixity}->{$fixity_algorithm} was expected");
-            }
+            $result = check_file_fixity($fileobj, $result, $fixity_algorithm);
           }
         }
         if ($result->{errors} > 0) {
@@ -310,3 +333,44 @@ if (defined $search_dir && -d "$search_dir") {
 say "";
 1;
 
+__END__
+
+=head1 NAME
+
+deep_fixitycheck.pl
+
+=head1 SYNOPSIS
+
+perl ./deep_fixitycheck.pl --map_path=~/sdvrosetta.test/ --search_dir=~/sdvrosetta.test/permanent_storage/2020/09/28 --report=report.txt
+
+=head1 DESCRIPTION
+
+Scans a Rosetta repository (or parts of it) deeply if there are any fixity problems.
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--help>
+
+This help.
+
+=item B<--map_path>
+
+
+If the F</permanent> of remote Rosetta system is mappd locally to anothermountpoint, this parameter is used to map the
+original filename within rosetta to the locally mounted one.
+
+Example: F</permanent/> is mounted to F</home/foo/permanent>, then the B<--map_path> is F</home/foo/>
+
+=item B<--search_dir>
+
+The search_dir is the directory where the search starts. The search_dir could be any subdirectory of B<--map_path>.
+
+=item B<--report>
+
+The file where the report is stored. If the file exist, the report will be append without warnings.
+
+=back
+
+=cut
-- 
GitLab