Skip to content
Snippets Groups Projects
Commit 1c50ed62 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- bugfix, chomp shoudl be called before access

- added existence and read checks for the IE file itself
- improved report
parent 95ceff6e
No related branches found
No related tags found
No related merge requests found
...@@ -334,6 +334,7 @@ sub stage2_for_ie { ...@@ -334,6 +334,7 @@ sub stage2_for_ie {
my $map_path = shift; my $map_path = shift;
my $report_path = shift; my $report_path = shift;
my $stat = shift; my $stat = shift;
my $ie_errors = 0;
foreach my $fileobj (@{ $unseen }) { foreach my $fileobj (@{ $unseen }) {
if ($bf->key_count() >= (0.8 * $capacity)) { # reset Bloom filter if 80% filled if ($bf->key_count() >= (0.8 * $capacity)) { # reset Bloom filter if 80% filled
#print "reset bloomfilter\n"; #print "reset bloomfilter\n";
...@@ -364,10 +365,13 @@ sub stage2_for_ie { ...@@ -364,10 +365,13 @@ sub stage2_for_ie {
if ($result->{errors} > 0) { if ($result->{errors} > 0) {
$report_path->append_utf8("-" x 60, "\n"); $report_path->append_utf8("-" x 60, "\n");
$stat->{errors} += $result->{errors}; $stat->{errors} += $result->{errors};
$ie_errors += $result->{errors};
} else { # no errors } else { # no errors
$report_path->append_utf8(" none\n");
} }
} }
if ($ie_errors == 0) {
$report_path->append_utf8(" none\n");
}
# write report # write report
} }
...@@ -399,15 +403,31 @@ sub stage2 ($$$$) { ...@@ -399,15 +403,31 @@ sub stage2 ($$$$) {
my $prev_ie = ""; my $prev_ie = "";
$fh_unsorted_file = $tmp_ies_unsorted_path->openr(); $fh_unsorted_file = $tmp_ies_unsorted_path->openr();
while (<$fh_unsorted_file>) { while (<$fh_unsorted_file>) {
chomp;
my $actual_ie = $_; my $actual_ie = $_;
# scan each IE # scan each IE
$stat->{IEs}++; $stat->{IEs}++;
chomp;
my $transferrate_in_MBs = sprintf("%0.2f", $stat->{scansize} / (time - $stat->{begin} + 1) / 1024 / 1024); my $transferrate_in_MBs = sprintf("%0.2f", $stat->{scansize} / (time - $stat->{begin} + 1) / 1024 / 1024);
my $bfusage = int($bf->key_count() * 100 / $capacity); my $bfusage = int($bf->key_count() * 100 / $capacity);
print $progressbar->report("parse IE files: %40b running: %L ETA: %E ($count/$cnt_unsorted_files IEs, tfr=$transferrate_in_MBs MB/s, bfu=$bfusage%) \r", ++$count); print $progressbar->report("parse IE files: %40b running: %L ETA: %E ($count/$cnt_unsorted_files IEs, tfr=$transferrate_in_MBs MB/s, bfu=$bfusage%) \r", ++$count);
my $timestamp = strftime("%Y-%m-%d %H:%M:%S %z (%Z)", localtime(time)); my $timestamp = strftime("%Y-%m-%d %H:%M:%S %z (%Z)", localtime(time));
$report_path->append_utf8("$timestamp, IE $actual_ie with following errors:\n"); $report_path->append_utf8("$timestamp, IE $actual_ie with following errors:\n");
if (! -f $actual_ie ) {
$report_path->append_utf8("IE file does not exist (anymore)!");
$stat->{errors}++;
next;
}
if (-s $actual_ie == 0) {
$report_path->append_utf8("IE file is empty!");
$stat->{errors}++;
next;
}
my ($head_of_actual_ie) = path($actual_ie)->lines( {count=>1});
if ($head_of_actual_ie !~ m#<\?xml#) {
$stat->{errors}++;
$report_path->append_utf8("IE file is not a xml file, start with '$head_of_actual_ie'");
next;
}
my $ret = parse_iexml($actual_ie, $recovery); my $ret = parse_iexml($actual_ie, $recovery);
my $unseen = bloomfilter_to_unseen($bf, $ret->{files}); my $unseen = bloomfilter_to_unseen($bf, $ret->{files});
if (scalar @{$unseen} == 0) {$report_path->append_utf8("skipped because files already checked using IE $prev_ie\n");} if (scalar @{$unseen} == 0) {$report_path->append_utf8("skipped because files already checked using IE $prev_ie\n");}
...@@ -493,13 +513,13 @@ if (!defined $report_file || length($report_file) < 1) { ...@@ -493,13 +513,13 @@ if (!defined $report_file || length($report_file) < 1) {
die "report file is empty!"; die "report file is empty!";
}; };
if (!path($map_path)->is_dir) { if (!path($map_path)->is_dir) {
die "map path $map_path does not exist!"; die "map path $map_path does not exist! Please double check if map path is correct!";
} }
if (!path($search_dir)->is_dir) { if (!path($search_dir)->is_dir) {
die "search dir $search_dir does not exist!"; die "search dir $search_dir does not exist!";
} }
if ($search_dir !~ m/^$map_path/) { if ($search_dir !~ m/^$map_path/) {
die "map_path $map_path should be part of search dir $search_dir!"; die "map_path $map_path should be part of search dir $search_dir! Please double check if map path is correct!";
} }
if ($cache_str eq "") { if ($cache_str eq "") {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment