From 718b2e1927b6deb5e83724b21840ac0bfbaeead9 Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <art1@andreas-romeyke.de> Date: Thu, 15 Jul 2021 11:42:39 +0200 Subject: [PATCH] - refactoring, splitted parse_ie_xm() --- perl/exit_strategy.pl | 93 ++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 32 deletions(-) diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index d584095..7dfaacd 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -280,29 +280,20 @@ sub check_if_db_conform ($$) { my $compiled_xpath_admid = XML::LibXML::XPathExpression->new('@ADMID'); my $compiled_xpath_xlinhref = XML::LibXML::XPathExpression->new('@xlin:href'); my $compiled_xpath_dot = XML::LibXML::XPathExpression->new('.'); - sub parse_iexml($$) { + ############################################################ + sub get_title { + my $xp = shift; my $filename = shift; - my $recovery_flag = shift; - if ($recovery_flag) { - $recovery_flag = 2; # avoid warnings, see XML::LibXML::Parser POD about 'recovery' - } - my $dom = XML::LibXML->load_xml( - location => $filename, - recover => $recovery_flag, - no_blanks => 1, - compact => 1, - no_network => 1, - ); - my $xp = get_xpath_context(); - $xp->setContextNode($dom); - ############################################ # get title my $title = $xp->findvalue($compiled_xpath_titles); check_if_db_conform($title, $filename); - ############################################ - # get dc-records - my @dcrecords; + return $title; + } + sub get_dcrecords { + my $xp = shift; + my $filename = shift; + my @dcrecords; my $dcnodes = $xp->find($compiled_xpath_dcrecords); foreach my $dcnode ($dcnodes->get_nodelist) { #my $ref = ref $dcnode; use Data::Printer; p( $ref); @@ -319,10 +310,14 @@ sub check_if_db_conform ($$) { push @dcrecords, \@pair; } } - ############################################ - # get right representation ID (has a dnx-section with <key id=label>LOCAL</key>) - my $repids = $xp->find($compiled_xpath_amdsecs); #/mets:mets/mets:amdSec + return @dcrecords; + } + + sub get_repid { + my $xp = shift; + my $filename = shift; my $repid; + my $repids = $xp->find($compiled_xpath_amdsecs); #/mets:mets/mets:amdSec my @repnodes = $repids->get_nodelist; my $found = scalar @repnodes; if (1 == $found) { @@ -343,18 +338,15 @@ sub check_if_db_conform ($$) { } #print XML::XPath::XMLParser::as_string($node), "\n\n"; } - } elsif (0 == $found) { - say STDERR "No reppid found in file $filename, is IE purged?"; - my %tmp; - $tmp{"filename"}=$filename; - $tmp{"purged"}=1; - return \%tmp; } - if (!defined $repid) { - say "No repid found in file $filename"; - } - ############################################ - # get all files of LOCAL representation + # if $found == 0 do nothing + return $repid; + } + + sub get_files { + my $xp = shift; + my $filename = shift; + my $repid = shift; my @files; my $filegrpnodes = $xp->find($compiled_xpath_filegrps); foreach my $filegrpnode ($filegrpnodes->get_nodelist) { @@ -367,6 +359,43 @@ sub check_if_db_conform ($$) { } } } + return @files; + } + + sub parse_iexml($$) { + my $filename = shift; + my $recovery_flag = shift; + if ($recovery_flag) { + $recovery_flag = 2; # avoid warnings, see XML::LibXML::Parser POD about 'recovery' + } + my $dom = XML::LibXML->load_xml( + location => $filename, + recover => $recovery_flag, + no_blanks => 1, + compact => 1, + no_network => 1, + ); + my $xp = get_xpath_context(); + $xp->setContextNode($dom); + ############################################ + # get title + my $title = get_title($xp, $filename); + ############################################ + # get dc-records + my @dcrecords = get_dcrecords($xp, $filename); + ############################################ + # get right representation ID (has a dnx-section with <key id=label>LOCAL</key>) + my $repid = get_repid($xp, $filename); + if (!defined $repid) { + say STDERR "No repid found in file $filename, is IE purged?"; + my %tmp; + $tmp{"filename"}=$filename; + $tmp{"purged"}=1; + return \%tmp; + } + ############################################ + # get all files of LOCAL representation + my @files = get_files ($xp, $filename, $repid); my %ret; $ret{"filename" } = $filename; $ret{"title"} = $title; -- GitLab