From 1518ead1f718d51f72885cc7ae8490ac3b457073 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <andreas.romeyke@slub-dresden.de>
Date: Mon, 15 Apr 2024 10:54:44 +0200
Subject: [PATCH] - added check_if_file_is_readable() as a workaround to check
 if NFS read errors

---
 perl/exit_strategy.pl | 57 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 8 deletions(-)

diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl
index 8fb86d1..5158773 100644
--- a/perl/exit_strategy.pl
+++ b/perl/exit_strategy.pl
@@ -532,7 +532,45 @@ XPATH
     return \@auth;
   }
 
-  sub parse_iexml($filename, $recovery_flag) {
+sub check_if_file_is_readable($filename) {
+  # workaround to check if NFS read errors
+  my $ok = open(my $FH, '<', $filename);
+  if (!$ok) {
+    warn "file '$filename' could not be opened, $!\n";
+    return;
+  }
+  binmode($FH);
+  my $buf;
+  my $readc = sysread $FH, $buf, 4;
+  unless (4 == $readc) {
+    warn "file '$filename' could not read 4 bytes from (beginning file), $!\n";
+    return;
+  }
+  my $pos = sysseek $FH, -4, 2;
+  unless (defined $pos) {
+    warn "file '$filename' could not seek 4 bytes before end, $!\n";
+    return;
+  }
+  unless (4 == $readc) {
+    warn "file '$filename' could not read 4 bytes from (end file), $!\n";
+    return;
+  }
+  close $FH;
+  return 1;
+}
+
+sub parse_iexml($filename, $recovery_flag) {
+  my $retry = 3;
+  while ($retry > 0) {
+
+    if (!check_if_file_is_readable($filename)) {
+      $retry--;
+      my $wait = int(rand(60));
+      warn "detected non-readable file $filename, retrying after $wait seconds ... ($retry)";
+      sleep $wait;
+      next;
+    }
+
     if ($recovery_flag) {
       $recovery_flag = 2; # avoid warnings, see XML::LibXML::Parser POD about 'recovery'
     }
@@ -543,7 +581,7 @@ XPATH
     $xp->setContextNode($dom);
     my $dmdsec = $xp->findnodes($compiled_xpath_dmdSec)->[0];
     my $amdsec = $xp->findnodes($compiled_xpath_amdSec_rep)->[0];
-    my $filesec= $xp->findnodes($compiled_xpath_fileSec)->[0];
+    my $filesec = $xp->findnodes($compiled_xpath_fileSec)->[0];
     my $ret;
     $ret->{"filename"} = $filename;
     ############################################
@@ -555,7 +593,7 @@ XPATH
     ############################################
     my $is_deleted = has_purged_entries($xp, $amdsec);
     if ($is_deleted) {
-      my @purged_states = @{ get_purged_states($xp, $amdsec) };
+      my @purged_states = @{get_purged_states($xp, $amdsec)};
       foreach my $idx (0 .. $#purged_states) {
         $ret->{'purged'}->[$idx]->{state} = $purged_states[$idx];
         $ret->{'purged'}->[$idx]->{reason} = get_purged_reasons($xp, $amdsec)->[$idx];
@@ -570,7 +608,8 @@ XPATH
       say STDERR "No repid found in file $filename, is IE intentionally purged?";
       if ($is_deleted) {
         say STDERR "   Yes, a corresponding purge event is found.";
-      } else {
+      }
+      else {
         say STDERR "   No, a corresponding purge event was missed. This indicates an error in archive.";
       }
       return $ret;
@@ -579,7 +618,7 @@ XPATH
     ############################################
     # get all files of LOCAL representation
     $ret->{"filepids"} = get_filepids_ref($xp, $filesec, $repid);
-    $ret->{"files"}  = get_files_ref($xp, $filesec, $repid);
+    $ret->{"files"} = get_files_ref($xp, $filesec, $repid);
     my @loc_and_size = map {
       my $fpid = $_;
       my $location = get_file_path($xp, $filesec, $fpid);
@@ -588,15 +627,17 @@ XPATH
       my $size = get_filesize($xp, $fpid);
       #my $size = get_filesize_rx($slurp, $fpid);
       #say "size=$size";
-      [$location, $size];
+      [ $location, $size ];
     } @{$ret->{"filepids"}};
 
     foreach my $entry (@loc_and_size) {
-      my ($location, $size) = @{ $entry };
-      $ret->{"sizes"}->{$location} = $size ;
+      my ($location, $size) = @{$entry};
+      $ret->{"sizes"}->{$location} = $size;
     }
     return $ret;
   }
+}
+
 
 # returns count of subdirs of $dir
 sub searching_relevant_subdirs ($dir) {
-- 
GitLab