From ff4dea9d0a9c265d6a82886b542fa538e299cb9a Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <andreas.romeyke@slub-dresden.de> Date: Thu, 19 Sep 2024 20:38:06 +0200 Subject: [PATCH] - rewritten and simplified part handling purged / deleted entries in parse_iexml() --- perl/exit_strategy.pl | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index 218934c..8caa461 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -453,7 +453,8 @@ sub check_if_db_conform ($string, $filename) { my $str_repid_old = "/mets:mets/mets:amdSec[starts-with(\@ID, \'REP\') and $str_local_reps]/\@ID"; my $compiled_xpath_repid_old = XML::LibXML::XPathExpression->new($str_repid_old); # only event 272 or 274 should be used, next line uses a multiple-predicate hack for XPATH 1.0 - my $xpath_if_purged_expr = 'mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. != 273][. <= 274]]'; + my $xpath_if_deleted_expr = 'mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. = 272]]'; + my $xpath_if_purged_expr = 'mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. = 274]]'; my $compiled_xpath_ifpurged_event = XML::LibXML::XPathExpression->new('dnx:key[@id="eventDescription"]/text()'); my $compiled_xpath_purged_event_date = XML::LibXML::XPathExpression->new('dnx:key[@id="eventDateTime"]/text()'); my $compiled_xpath_purged_event_authorized_by = XML::LibXML::XPathExpression->new('dnx:key[@id="linkingAgentIdentifierValue1"]/text()'); @@ -536,14 +537,14 @@ XPATH } } - sub get_purged_states($xp, $dnx) { + sub get_purged_state($xp, $dnx) { # we need to earch for eventIdentifierValue 272 or 274. # the eventDescription should be # a) IE has been deleted # b) IE has been purged my @del_nodes = map {$_->nodeValue} $xp->findnodes($compiled_xpath_ifpurged_event, $dnx); my @states = map {$_ =~ s/^IE has been (deleted|purged)$/$1/; $_} @del_nodes; - return \@states; + return $states[0]; } sub has_purged_entries($xp, $dnx) { @@ -645,30 +646,27 @@ sub parse_iexml($filename, $recovery_flag) { # get dc-records $ret->{"dcrecords"} = get_dcrecords_ref($xp, $dmdsec); ############################################ - my $dnx_ctx = $xp->findnodes($xpath_if_purged_expr, $amdsec_ie)->[0]; - my $is_deleted = has_purged_entries($xp, $dnx_ctx); - if ($is_deleted) { - my @purged_states = @{get_purged_states($xp, $dnx_ctx)}; - my $idx_purged; - my $idx_deleted; - for (my $i = 0; $i <= $#purged_states; $i++) { - if ($purged_states[$i] eq 'purged') {$idx_purged = $i; last;} - elsif ($purged_states[$i] eq 'deleted') {$idx_deleted = $i;} + my $is_deleted; + foreach my $state (qw(purged deleted)) { + my $dnx_ctx; + if ($state eq 'purged') { + $dnx_ctx = $xp->findnodes($xpath_if_purged_expr, $amdsec_ie)->[0]; } - if (defined $idx_purged) { - $ret->{purged}->{state} = "purged"; - $ret->{purged}->{reason} = get_purged_reasons($xp, $dnx_ctx)->[$idx_purged]; - $ret->{purged}->{note} = get_purged_notes($xp, $dnx_ctx)->[$idx_purged]; - $ret->{purged}->{date} = get_purged_dates($xp, $dnx_ctx)->[$idx_purged]; - $ret->{purged}->{authorized_by} = get_purged_authorized_by($xp, $dnx_ctx)->[$idx_purged]; - } else { - $ret->{deleted}->{state} = "deleted"; - $ret->{deleted}->{reason} = get_purged_reasons($xp, $dnx_ctx)->[$idx_deleted]; - $ret->{deleted}->{note} = get_purged_notes($xp, $dnx_ctx)->[$idx_deleted]; - $ret->{deleted}->{date} = get_purged_dates($xp, $dnx_ctx)->[$idx_deleted]; - $ret->{deleted}->{authorized_by} = get_purged_authorized_by($xp, $dnx_ctx)->[$idx_deleted]; + else { + $dnx_ctx = $xp->findnodes($xpath_if_deleted_expr, $amdsec_ie)->[0]; + } + $is_deleted = has_purged_entries($xp, $dnx_ctx); + if ($is_deleted) { + my $purged_state = get_purged_state($xp, $dnx_ctx); + $ret->{$state}->{state} = $purged_state; + $ret->{$state}->{reason} = get_purged_reasons($xp, $dnx_ctx)->[0]; + $ret->{$state}->{note} = get_purged_notes($xp, $dnx_ctx)->[0]; + $ret->{$state}->{date} = get_purged_dates($xp, $dnx_ctx)->[0]; + $ret->{$state}->{authorized_by} = get_purged_authorized_by($xp, $dnx_ctx)->[0]; + if ($state eq "purged") {last;} } } + ############################################ # get right representation ID (has a dnx-section with <key id=label>LOCAL</key>) my $repid = get_repid($xp); @@ -682,7 +680,9 @@ sub parse_iexml($filename, $recovery_flag) { } return $ret; } + $ret->{"repid"} = $repid; + ############################################ # get all files of LOCAL representation my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $filesec)->[0]; -- GitLab