From 1518ead1f718d51f72885cc7ae8490ac3b457073 Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <andreas.romeyke@slub-dresden.de> Date: Mon, 15 Apr 2024 10:54:44 +0200 Subject: [PATCH] - added check_if_file_is_readable() as a workaround to check if NFS read errors --- perl/exit_strategy.pl | 57 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index 8fb86d1..5158773 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -532,7 +532,45 @@ XPATH return \@auth; } - sub parse_iexml($filename, $recovery_flag) { +sub check_if_file_is_readable($filename) { + # workaround to check if NFS read errors + my $ok = open(my $FH, '<', $filename); + if (!$ok) { + warn "file '$filename' could not be opened, $!\n"; + return; + } + binmode($FH); + my $buf; + my $readc = sysread $FH, $buf, 4; + unless (4 == $readc) { + warn "file '$filename' could not read 4 bytes from (beginning file), $!\n"; + return; + } + my $pos = sysseek $FH, -4, 2; + unless (defined $pos) { + warn "file '$filename' could not seek 4 bytes before end, $!\n"; + return; + } + unless (4 == $readc) { + warn "file '$filename' could not read 4 bytes from (end file), $!\n"; + return; + } + close $FH; + return 1; +} + +sub parse_iexml($filename, $recovery_flag) { + my $retry = 3; + while ($retry > 0) { + + if (!check_if_file_is_readable($filename)) { + $retry--; + my $wait = int(rand(60)); + warn "detected non-readable file $filename, retrying after $wait seconds ... ($retry)"; + sleep $wait; + next; + } + if ($recovery_flag) { $recovery_flag = 2; # avoid warnings, see XML::LibXML::Parser POD about 'recovery' } @@ -543,7 +581,7 @@ XPATH $xp->setContextNode($dom); my $dmdsec = $xp->findnodes($compiled_xpath_dmdSec)->[0]; my $amdsec = $xp->findnodes($compiled_xpath_amdSec_rep)->[0]; - my $filesec= $xp->findnodes($compiled_xpath_fileSec)->[0]; + my $filesec = $xp->findnodes($compiled_xpath_fileSec)->[0]; my $ret; $ret->{"filename"} = $filename; ############################################ @@ -555,7 +593,7 @@ XPATH ############################################ my $is_deleted = has_purged_entries($xp, $amdsec); if ($is_deleted) { - my @purged_states = @{ get_purged_states($xp, $amdsec) }; + my @purged_states = @{get_purged_states($xp, $amdsec)}; foreach my $idx (0 .. $#purged_states) { $ret->{'purged'}->[$idx]->{state} = $purged_states[$idx]; $ret->{'purged'}->[$idx]->{reason} = get_purged_reasons($xp, $amdsec)->[$idx]; @@ -570,7 +608,8 @@ XPATH say STDERR "No repid found in file $filename, is IE intentionally purged?"; if ($is_deleted) { say STDERR " Yes, a corresponding purge event is found."; - } else { + } + else { say STDERR " No, a corresponding purge event was missed. This indicates an error in archive."; } return $ret; @@ -579,7 +618,7 @@ XPATH ############################################ # get all files of LOCAL representation $ret->{"filepids"} = get_filepids_ref($xp, $filesec, $repid); - $ret->{"files"} = get_files_ref($xp, $filesec, $repid); + $ret->{"files"} = get_files_ref($xp, $filesec, $repid); my @loc_and_size = map { my $fpid = $_; my $location = get_file_path($xp, $filesec, $fpid); @@ -588,15 +627,17 @@ XPATH my $size = get_filesize($xp, $fpid); #my $size = get_filesize_rx($slurp, $fpid); #say "size=$size"; - [$location, $size]; + [ $location, $size ]; } @{$ret->{"filepids"}}; foreach my $entry (@loc_and_size) { - my ($location, $size) = @{ $entry }; - $ret->{"sizes"}->{$location} = $size ; + my ($location, $size) = @{$entry}; + $ret->{"sizes"}->{$location} = $size; } return $ret; } +} + # returns count of subdirs of $dir sub searching_relevant_subdirs ($dir) { -- GitLab