From d3ed0af3978688b2bc631213ec9745a9751c2287 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Fri, 16 Oct 2020 10:10:43 +0200
Subject: [PATCH] - improved fixity calculation, now all checksums build at
 once to avoid multiple reads

---
 deep_fixitycheck.pl | 47 ++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/deep_fixitycheck.pl b/deep_fixitycheck.pl
index 62c6b15..6095308 100644
--- a/deep_fixitycheck.pl
+++ b/deep_fixitycheck.pl
@@ -28,6 +28,9 @@ use Time::Progress;
 use XML::LibXML::XPathContext;
 use Getopt::Long;
 use constant DEBUG => 0; # no debug
+use Digest::CRC;
+use Digest::MD5;
+use Digest::SHA;
 use Pod::Usage;
 use IO::Handle;
 use Fcntl qw(SEEK_END SEEK_SET);
@@ -249,21 +252,33 @@ sub check_file_seekable($$) {
 }
 
 # check if referenced file has correct checksum for given algorithm
-sub check_file_fixity($$$) {
+sub check_file_fixities($$) {
   my $fileobj = shift;
   my $result = shift;
-  my $fixity_algorithm = shift;
-  my $digest_mapping = { # maps Rosetta fixity algorithm names to Perl fixity algorithm names
-      'CRC32'  => 'CRC-32',
-      'MD5'    => 'MD5',
-      'SHA1'   => 'SHA-1',
-      'SHA256' => 'SHA-256',
-      'SHA512' => 'SHA-512'
-  };
-  my $digest_name = $digest_mapping->{$fixity_algorithm};
-  $result->{fixity}->{$fixity_algorithm} = path($fileobj->{file_mounted})->digest($digest_name);
-  if ($result->{fixity}->{$fixity_algorithm} ne $fileobj->{fixity}->{$fixity_algorithm}) {
-    add_error($fileobj, $result, "has fixity $result->{fixity}->{$fixity_algorithm} for algorithm $fixity_algorithm, but $fileobj->{fixity}->{$fixity_algorithm} was expected");
+  my %digest_mapping = ( # maps Rosetta fixity algorithm names to Perl fixity algorithm names
+      'CRC32'  => Digest::CRC->new(type=>"crc32"),
+      'MD5'    => Digest::MD5->new(),
+      'SHA1'   => Digest::SHA->new(1),
+      'SHA256' => Digest::SHA->new(256),
+      'SHA512' => Digest::SHA->new(512)
+  );
+  my $fh = path($fileobj->{file_mounted})->openr();
+  binmode($fh);
+  my $buffer;
+  while (read($fh, $buffer, 128*1024)) { # 128kB blocks
+    foreach my $fixity_algorithm (keys %digest_mapping) {
+      $digest_mapping{$fixity_algorithm}->add($buffer);
+    }
+  }
+  close ($fh);
+  foreach my $fixity_algorithm (keys %digest_mapping) {
+    if (defined $fileobj->{fixity}->{$fixity_algorithm} && length( $fileobj->{fixity}->{$fixity_algorithm} ) > 0) {
+      $result->{fixity}->{$fixity_algorithm} = $digest_mapping{$fixity_algorithm}->hexdigest();
+      if ($result->{fixity}->{$fixity_algorithm} ne $fileobj->{fixity}->{$fixity_algorithm}) {
+        add_error($fileobj, $result, "has fixity $result->{fixity}->{$fixity_algorithm} for algorithm $fixity_algorithm, but $fileobj->{fixity}->{$fixity_algorithm} was expected");
+        die;
+      }
+    }
   }
   return $result;
 }
@@ -342,11 +357,7 @@ if (defined $search_dir && -d "$search_dir") {
             $stat->{scansize} += $result->{size};
             $result = check_file_seekable($fileobj, $result);
             if ($result->{seekable}) {
-              foreach my $fixity_algorithm (@algorithms) {
-                if ($fileobj->{fixity}->{$fixity_algorithm}) {
-                  $result = check_file_fixity($fileobj, $result, $fixity_algorithm);
-                }
-              }
+              $result = check_file_fixities($fileobj, $result);
             }
           }
         }
-- 
GitLab