diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index dc6409e97fec7e39348e86a243723d53c9244bfa..966f5832a3da1cc5ceaf7cadf8fe301cb7d02f75 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -437,8 +437,10 @@ sub check_if_db_conform ($string, $filename) { # ############################################################################### my $compiled_xpath_dmdSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:dmdSec'); + my $compiled_xpath_amdSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec'); my $compiled_xpath_amdSec_rep = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'REP\')]'); my $compiled_xpath_amdSec_fl = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'FL\')]'); + my $compiled_xpath_amdSec_ie = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]'); my $compiled_xpath_fileSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:fileSec'); my $compiled_xpath_titles = XML::LibXML::XPathExpression->new('mets:mdWrap[1]/mets:xmlData[1]/dc:record/dc:title[1]'); my $compiled_xpath_fileGrp = XML::LibXML::XPathExpression->new('/mets:mets/mets:fileSec/mets:fileGrp'); @@ -451,7 +453,7 @@ sub check_if_db_conform ($string, $filename) { my $str_repid_old = "/mets:mets/mets:amdSec[starts-with(\@ID, \'REP\') and $str_local_reps]/\@ID"; my $compiled_xpath_repid_old = XML::LibXML::XPathExpression->new($str_repid_old); # only event 272 or 274 should be used, next line uses a multiple-predicate hack for XPATH 1.0 - my $xpath_if_purged_expr = '/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. != 273][. <= 274]]/'; + my $xpath_if_purged_expr = 'mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. != 273][. <= 274]]/'; my $compiled_xpath_ifpurged_event = XML::LibXML::XPathExpression->new($xpath_if_purged_expr . 'dnx:key[@id="eventDescription"]/text()'); my $compiled_xpath_purged_event_date = XML::LibXML::XPathExpression->new($xpath_if_purged_expr .'dnx:key[@id="eventDateTime"]/text()'); my $compiled_xpath_purged_event_authorized_by = XML::LibXML::XPathExpression->new($xpath_if_purged_expr . 'dnx:key[@id="linkingAgentIdentifierValue1"]/text()'); @@ -493,22 +495,20 @@ sub check_if_db_conform ($string, $filename) { # if $found == 0 do nothing return $repid; } - sub get_filepids_ref ($xp, $fsp, $repid) { - my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $fsp)->[0]; + sub get_filepids_ref ($xp, $filegrp) { my @files_nodes = $xp->findnodes($compiled_xpath_filepids, $filegrp); my @filepids = map { $_->nodeValue; } @files_nodes; return \@filepids; } - sub get_files_ref ($xp, $fsp, $repid){ - my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $fsp)->[0]; + sub get_files_ref ($xp, $filegrp){ my @files_nodes = $xp->findnodes($compiled_xpath_files, $filegrp); my @files = map { $_->nodeValue; } @files_nodes; return \@files; } - sub get_file_path($xp, $fsp, $filepid) { - return $xp->findvalue("mets:fileGrp/mets:file[\@ID=\"$filepid\"]/mets:FLocat/\@xlin:href", $fsp); + sub get_file_path($xp, $filegrp, $filepid) { + return $xp->findvalue("mets:file[\@ID=\"$filepid\"]/mets:FLocat/\@xlin:href", $filegrp); } sub get_file_path_rx($xml, $filepid) { my $nt = qr{[^>]*}; @@ -551,8 +551,7 @@ XPATH # the eventDescription should be # a) IE has been deleted # b) IE has been purged - my @del_nodes = $xp->findnodes($compiled_xpath_ifpurged_event, $amd); - return (scalar @del_nodes > 0); + return $xp->exists($compiled_xpath_ifpurged_event, $amd); } sub get_purged_dates($xp, $amd) { @@ -634,7 +633,8 @@ sub parse_iexml($filename, $recovery_flag) { my $xp = get_xpath_context(); $xp->setContextNode($dom); my $dmdsec = $xp->findnodes($compiled_xpath_dmdSec)->[0]; - my $amdsec = $xp->findnodes($compiled_xpath_amdSec_rep)->[0]; + #my $amdsec_rep = $xp->findnodes($compiled_xpath_amdSec_rep)->[0]; + my $amdsec_ie = $xp->findnodes($compiled_xpath_amdSec_ie)->[0]; my $filesec = $xp->findnodes($compiled_xpath_fileSec)->[0]; my $ret; $ret->{"filename"} = $filename; @@ -684,11 +684,12 @@ sub parse_iexml($filename, $recovery_flag) { $ret->{"repid"} = $repid; ############################################ # get all files of LOCAL representation - $ret->{"filepids"} = get_filepids_ref($xp, $filesec, $repid); - $ret->{"files"} = get_files_ref($xp, $filesec, $repid); + my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $filesec)->[0]; + $ret->{"filepids"} = get_filepids_ref($xp, $filegrp); + $ret->{"files"} = get_files_ref($xp, $filegrp); my @loc_and_size = map { my $fpid = $_; - my $location = get_file_path($xp, $filesec, $fpid); + my $location = get_file_path($xp, $filegrp, $fpid); #my $location = get_file_path_rx($slurp, $fpid); #say "location=$location"; my $size = get_filesize($xp, $fpid); @@ -701,6 +702,7 @@ sub parse_iexml($filename, $recovery_flag) { my ($location, $size) = @{$entry}; $ret->{"sizes"}->{$location} = $size; } + return $ret; } }