diff --git a/deep_fixitycheck.pl b/deep_fixitycheck.pl index a2fcb738facb1d2ee8347d088372c8e256031e58..80b1096e20bc6a842eaa7cc621a28ba7de73c780 100644 --- a/deep_fixitycheck.pl +++ b/deep_fixitycheck.pl @@ -334,6 +334,7 @@ sub stage2_for_ie { my $map_path = shift; my $report_path = shift; my $stat = shift; + my $ie_errors = 0; foreach my $fileobj (@{ $unseen }) { if ($bf->key_count() >= (0.8 * $capacity)) { # reset Bloom filter if 80% filled #print "reset bloomfilter\n"; @@ -364,10 +365,13 @@ sub stage2_for_ie { if ($result->{errors} > 0) { $report_path->append_utf8("-" x 60, "\n"); $stat->{errors} += $result->{errors}; + $ie_errors += $result->{errors}; } else { # no errors - $report_path->append_utf8(" none\n"); } } + if ($ie_errors == 0) { + $report_path->append_utf8(" none\n"); + } # write report } @@ -399,15 +403,31 @@ sub stage2 ($$$$) { my $prev_ie = ""; $fh_unsorted_file = $tmp_ies_unsorted_path->openr(); while (<$fh_unsorted_file>) { + chomp; my $actual_ie = $_; # scan each IE $stat->{IEs}++; - chomp; my $transferrate_in_MBs = sprintf("%0.2f", $stat->{scansize} / (time - $stat->{begin} + 1) / 1024 / 1024); my $bfusage = int($bf->key_count() * 100 / $capacity); print $progressbar->report("parse IE files: %40b running: %L ETA: %E ($count/$cnt_unsorted_files IEs, tfr=$transferrate_in_MBs MB/s, bfu=$bfusage%) \r", ++$count); my $timestamp = strftime("%Y-%m-%d %H:%M:%S %z (%Z)", localtime(time)); $report_path->append_utf8("$timestamp, IE $actual_ie with following errors:\n"); + if (! -f $actual_ie ) { + $report_path->append_utf8("IE file does not exist (anymore)!"); + $stat->{errors}++; + next; + } + if (-s $actual_ie == 0) { + $report_path->append_utf8("IE file is empty!"); + $stat->{errors}++; + next; + } + my ($head_of_actual_ie) = path($actual_ie)->lines( {count=>1}); + if ($head_of_actual_ie !~ m#<\?xml#) { + $stat->{errors}++; + $report_path->append_utf8("IE file is not a xml file, start with '$head_of_actual_ie'"); + next; + } my $ret = parse_iexml($actual_ie, $recovery); my $unseen = bloomfilter_to_unseen($bf, $ret->{files}); if (scalar @{$unseen} == 0) {$report_path->append_utf8("skipped because files already checked using IE $prev_ie\n");} @@ -493,13 +513,13 @@ if (!defined $report_file || length($report_file) < 1) { die "report file is empty!"; }; if (!path($map_path)->is_dir) { - die "map path $map_path does not exist!"; + die "map path $map_path does not exist! Please double check if map path is correct!"; } if (!path($search_dir)->is_dir) { die "search dir $search_dir does not exist!"; } if ($search_dir !~ m/^$map_path/) { - die "map_path $map_path should be part of search dir $search_dir!"; + die "map_path $map_path should be part of search dir $search_dir! Please double check if map path is correct!"; } if ($cache_str eq "") {