From 718b2e1927b6deb5e83724b21840ac0bfbaeead9 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Thu, 15 Jul 2021 11:42:39 +0200
Subject: [PATCH] - refactoring, splitted parse_ie_xm()

---
 perl/exit_strategy.pl | 93 ++++++++++++++++++++++++++++---------------
 1 file changed, 61 insertions(+), 32 deletions(-)

diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl
index d584095..7dfaacd 100644
--- a/perl/exit_strategy.pl
+++ b/perl/exit_strategy.pl
@@ -280,29 +280,20 @@ sub check_if_db_conform ($$) {
   my $compiled_xpath_admid = XML::LibXML::XPathExpression->new('@ADMID');
   my $compiled_xpath_xlinhref = XML::LibXML::XPathExpression->new('@xlin:href');
   my $compiled_xpath_dot = XML::LibXML::XPathExpression->new('.');
-  sub parse_iexml($$) {
+  ############################################################
+  sub get_title {
+    my $xp = shift;
     my $filename = shift;
-    my $recovery_flag = shift;
-    if ($recovery_flag) {
-      $recovery_flag = 2; # avoid warnings, see XML::LibXML::Parser POD about 'recovery'
-    }
-    my $dom = XML::LibXML->load_xml(
-        location  => $filename,
-        recover   => $recovery_flag,
-        no_blanks => 1,
-        compact   => 1,
-        no_network    => 1,
-    );
-    my $xp = get_xpath_context();
-    $xp->setContextNode($dom);
-    ############################################
     # get title
     my $title = $xp->findvalue($compiled_xpath_titles);
     check_if_db_conform($title, $filename);
-    ############################################
-    # get dc-records
-    my @dcrecords;
+    return $title;
+  }
 
+  sub get_dcrecords {
+    my $xp = shift;
+    my $filename = shift;
+    my @dcrecords;
     my $dcnodes = $xp->find($compiled_xpath_dcrecords);
     foreach my $dcnode ($dcnodes->get_nodelist) {
       #my $ref = ref $dcnode; use Data::Printer; p( $ref);
@@ -319,10 +310,14 @@ sub check_if_db_conform ($$) {
         push @dcrecords, \@pair;
       }
     }
-    ############################################
-    # get right representation ID (has a dnx-section with <key id=label>LOCAL</key>)
-    my $repids = $xp->find($compiled_xpath_amdsecs); #/mets:mets/mets:amdSec
+    return @dcrecords;
+  }
+
+  sub get_repid {
+    my $xp = shift;
+    my $filename = shift;
     my $repid;
+    my $repids = $xp->find($compiled_xpath_amdsecs); #/mets:mets/mets:amdSec
     my @repnodes = $repids->get_nodelist;
     my $found = scalar @repnodes;
     if (1 == $found) {
@@ -343,18 +338,15 @@ sub check_if_db_conform ($$) {
         }
         #print XML::XPath::XMLParser::as_string($node), "\n\n";
       }
-    } elsif (0 == $found) {
-        say STDERR "No reppid found in file $filename, is IE purged?";
-        my %tmp;
-        $tmp{"filename"}=$filename;
-        $tmp{"purged"}=1;
-        return \%tmp;
     }
-    if (!defined $repid) {
-      say "No repid found in file $filename";
-    }
-    ############################################
-    # get all files of LOCAL representation
+    # if $found == 0 do nothing
+    return $repid;
+  }
+
+  sub get_files {
+    my $xp = shift;
+    my $filename = shift;
+    my $repid = shift;
     my @files;
     my $filegrpnodes = $xp->find($compiled_xpath_filegrps);
     foreach my $filegrpnode ($filegrpnodes->get_nodelist) {
@@ -367,6 +359,43 @@ sub check_if_db_conform ($$) {
         }
       }
     }
+    return @files;
+  }
+
+  sub parse_iexml($$) {
+    my $filename = shift;
+    my $recovery_flag = shift;
+    if ($recovery_flag) {
+      $recovery_flag = 2; # avoid warnings, see XML::LibXML::Parser POD about 'recovery'
+    }
+    my $dom = XML::LibXML->load_xml(
+        location  => $filename,
+        recover   => $recovery_flag,
+        no_blanks => 1,
+        compact   => 1,
+        no_network    => 1,
+    );
+    my $xp = get_xpath_context();
+    $xp->setContextNode($dom);
+    ############################################
+    # get title
+    my $title = get_title($xp, $filename);
+    ############################################
+    # get dc-records
+    my @dcrecords = get_dcrecords($xp, $filename);
+    ############################################
+    # get right representation ID (has a dnx-section with <key id=label>LOCAL</key>)
+    my $repid = get_repid($xp, $filename);
+    if (!defined $repid) {
+      say STDERR "No repid found in file $filename, is IE purged?";
+      my %tmp;
+      $tmp{"filename"}=$filename;
+      $tmp{"purged"}=1;
+      return \%tmp;
+    }
+    ############################################
+    # get all files of LOCAL representation
+    my @files = get_files ($xp, $filename, $repid);
     my %ret;
     $ret{"filename" } = $filename;
     $ret{"title"} = $title;
-- 
GitLab