Skip to content
Snippets Groups Projects
Commit e461d73b authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

Merge branch 'master' of git.slub-dresden.de:digital-preservation/rosettadeepfixity

parents fa1ff208 79637e99
No related branches found
No related tags found
No related merge requests found
......@@ -28,7 +28,7 @@ use XML::LibXML;
use Time::Progress;
use XML::LibXML::XPathContext;
use Getopt::Long::Complete qw(GetOptionsWithCompletion);
use Digest::CRC;
use Crypt::Checksum::CRC32;
use Digest::MD5;
use Digest::SHA;
use Bloom::Filter;
......@@ -263,7 +263,7 @@ sub check_file_fixities($$) {
my $fileobj = shift;
my $result = shift;
my %digest_mapping = ( # maps Rosetta fixity algorithm names to Perl fixity algorithm names
'CRC32' => Digest::CRC->new(type=>"crc32"),
'CRC32' => Crypt::Checksum::CRC32->new(),
'MD5' => Digest::MD5->new(),
'SHA1' => Digest::SHA->new(1),
'SHA256' => Digest::SHA->new(256),
......@@ -370,9 +370,12 @@ sub stage2_for_ie {
}
}
if ($ie_errors == 0) {
$report_path->append_utf8(" none\n");
$report_path->append_utf8(" no fixity errors\n");
}
# write report
# return current bloom filter
return $bf;
}
sub stage2 ($$$$) {
......@@ -400,7 +403,6 @@ sub stage2 ($$$$) {
$stat->{errors} = 0;
$stat->{scansize} = 0;
$stat->{begin} = time;
my $prev_ie = "";
$fh_unsorted_file = $tmp_ies_unsorted_path->openr();
while (<$fh_unsorted_file>) {
chomp;
......@@ -411,7 +413,7 @@ sub stage2 ($$$$) {
my $bfusage = int($bf->key_count() * 100 / $capacity);
print $progressbar->report("parse IE files: %40b running: %L ETA: %E ($count/$cnt_unsorted_files IEs, tfr=$transferrate_in_MBs MB/s, bfu=$bfusage%) \r", ++$count);
my $timestamp = strftime("%Y-%m-%d %H:%M:%S %z (%Z)", localtime(time));
$report_path->append_utf8("$timestamp, IE $actual_ie with following errors:\n");
$report_path->append_utf8("$timestamp, IE $actual_ie:\n");
if (! -f $actual_ie ) {
$report_path->append_utf8("IE file does not exist (anymore)!");
$stat->{errors}++;
......@@ -430,10 +432,16 @@ sub stage2 ($$$$) {
}
my $ret = parse_iexml($actual_ie, $recovery);
my $unseen = bloomfilter_to_unseen($bf, $ret->{files});
if (scalar @{$unseen} == 0) {$report_path->append_utf8("skipped because files already checked using IE $prev_ie\n");}
$prev_ie = $actual_ie;
my ($fcount_all, $fcount_unseen) = (scalar @{$ret->{files}}, scalar @{$unseen});
if ($fcount_unseen == 0) {
$report_path->append_utf8(" all files skipped because already checked\n");
}
elsif ($fcount_all > $fcount_unseen) {
my $fdiff = $fcount_all - $fcount_unseen;
$report_path->append_utf8(" $fdiff of $fcount_all files skipped because already checked\n");
}
###
stage2_for_ie($unseen, $bf, $map_path, $report_path, $stat);
$bf = stage2_for_ie($unseen, $bf, $map_path, $report_path, $stat);
}
say "";
$stat->{end} = time;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment