diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index 45cffbaae3b211de494e3b7ddf839d77b7b47efb..9c3fc8e47efca6e56f0fd47593be70b83f28a57e 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -238,8 +238,6 @@ SQL_DELETE_PLAN # $ret{"dcrecords"} = \@dcrecords; ############################################################################### sub write_addsql ($dbh, $plans, $refhash) { - use Data::Printer; - p( $refhash); my $iefile = basename($refhash->{"filename"}); my ($ieid,$ieversion); if ($iefile =~ m/^V(\d+)-(IE\d*)\.xml$/) { @@ -264,6 +262,23 @@ sub write_addsql ($dbh, $plans, $refhash) { $dcvalue=~tr/'/"/; $plans->{dc}->execute($ieid, $ieversion, $dckey, $dcvalue) or die "sql problem detected", $dbh->errstr; } + if ($refhash->{'purged'}) { + my @purged = @{$refhash->{'purged'}}; + foreach my $purge (@purged) { + # TODO: call plan plans->{deleted} + my $state = $purge->{'state'}; + my $reason = $purge->{'reason'}; + my $date = $purge->{'date'}; + my $authorized_by = $purge->{'authorized_by'}; + if (! defined $reason or length($reason) < 2) { + use Data::Printer; + p( $purge ); + p($iefile); + die; + } + $plans->{deleted}->execute($ieid, $ieversion, $state, $reason, $date, $authorized_by) or die "sql problem detected", $dbh->errstr; + } + } return 1; } @@ -356,7 +371,11 @@ sub check_if_db_conform ($string, $filename) { my $str_local_reps = "$str_local_record/*[local-name()=\'key\' and \@id=\'label\' and (. = \'LOCAL\' or . = \'LZA_INTERN\' or . = \'LZA\')]"; my $str_repid_old = "/mets:mets/mets:amdSec[starts-with(\@ID, \'REP\') and $str_local_reps]/\@ID"; my $compiled_xpath_repid_old = XML::LibXML::XPathExpression->new($str_repid_old); - my $compiled_xpath_ifpurged_event = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"]/. = 272 or . = 274]/dnx:key[@id="eventDescription"]/text()'); +# my $compiled_xpath_ifpurged_event = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[starts-with(dnx:key[@id="eventIdentifierValue"],"27")]/dnx:key[@id="eventDescription"]/text()'); + my $compiled_xpath_ifpurged_event = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. <= 274]]/dnx:key[@id="eventDescription"]/text()'); + my $compiled_xpath_purged_event_date = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. <= 274]]/dnx:key[@id="eventDateTime"]/text()'); + my $compiled_xpath_purged_event_authorized_by = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. <= 274]]/dnx:key[@id="linkingAgentIdentifierValue1"]/text()'); + my $compiled_xpath_purged_event_reason = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[@ID="ie-amd"]/mets:digiprovMD[@ID="ie-amd-digiprov"]/mets:mdWrap/mets:xmlData/dnx:dnx/dnx:section[@id="event"]/dnx:record[dnx:key[@id="eventIdentifierValue"][. >= 272][. <= 274]]/dnx:key[@id="eventOutcomeDetail1"]/text()'); ############################################################ sub get_title ($xp, $dmd){ # get title @@ -401,13 +420,44 @@ sub check_if_db_conform ($string, $filename) { return \@files; } - sub has_purged_entry($xp, $amd) { + sub get_purged_states($xp, $amd) { + # we need to earch for eventIdentifierValue 272 or 274. + # the eventDescription should be + # a) IE has been deleted + # b) IE has been purged + my @del_nodes = map {$_->nodeValue} $xp->findnodes($compiled_xpath_ifpurged_event, $amd); + my @states = map {$_ =~ s/^IE has been (deleted|purged)$/$1/; $_} @del_nodes; + return \@states; + } + + sub has_purged_entries($xp, $amd) { # we need to earch for eventIdentifierValue 272 or 274. # the eventDescription should be # a) IE has been deleted # b) IE has been purged - my @del_nodes = $xp->findvalue($compiled_xpath_ifpurged_event, $amd); - return (List::Util::any {$_ =~ m/^IE has been (deleted|purged)$/} @del_nodes); + my @del_nodes = $xp->findnodes($compiled_xpath_ifpurged_event, $amd); + return (scalar @del_nodes > 0); + } + + sub get_purged_dates($xp, $amd) { + my @date_nodes = $xp->findnodes($compiled_xpath_purged_event_date, $amd); + my @dates = map {$_->nodeValue} @date_nodes; + return \@dates; + } + + sub get_purged_reasons($xp, $amd) { + my @entries_str = map {$_->nodeValue} $xp->findnodes($compiled_xpath_purged_event_reason, $amd); + my @results; + foreach my $e ( @entries_str) { + my @entries = split(/;/, $e); + push @results, List::Util::first{ s/reason=//; } @entries + } + return \@results; + } + + sub get_purged_authorized_by($xp, $amd) { + my @auth = map {$_->nodeValue} $xp->findnodes($compiled_xpath_purged_event_authorized_by, $amd); + return \@auth; } sub parse_iexml($filename, $recovery_flag) { @@ -421,38 +471,43 @@ sub check_if_db_conform ($string, $filename) { my $dmdsec = $xp->findnodes($compiled_xpath_dmdSec)->[0]; my $amdsec = $xp->findnodes($compiled_xpath_amdSec)->[0]; my $filesec= $xp->findnodes($compiled_xpath_fileSec)->[0]; + my $ret; + $ret->{"filename"} = $filename; ############################################ # get title - my $title = get_title($xp, $dmdsec); + $ret->{"title"} = get_title($xp, $dmdsec); ############################################ # get dc-records - my $dcrecords_ref = get_dcrecords_ref($xp, $dmdsec); + $ret->{"dcrecords"} = get_dcrecords_ref($xp, $dmdsec); + ############################################ + my $is_deleted = has_purged_entries($xp, $amdsec); + if ($is_deleted) { + print "FOUND DELETED\n"; + my @purged_states = @{ get_purged_states($xp, $amdsec) }; + foreach my $idx (0 .. $#purged_states) { + $ret->{'purged'}->[$idx]->{state} = $purged_states[$idx]; + $ret->{'purged'}->[$idx]->{reason} = get_purged_reasons($xp, $amdsec)->[$idx]; + $ret->{'purged'}->[$idx]->{date} = get_purged_dates($xp, $amdsec)->[$idx]; + $ret->{'purged'}->[$idx]->{authorized_by} = get_purged_authorized_by($xp, $amdsec)->[$idx]; + } + } ############################################ # get right representation ID (has a dnx-section with <key id=label>LOCAL</key>) my $repid = get_repid($xp); if (!defined $repid) { say STDERR "No repid found in file $filename, is IE intentionally purged?"; - if (has_purged_entry($xp, $amdsec)) { + if ($is_deleted) { say STDERR " Yes, a corresponding purge event is found."; } else { say STDERR " No, a corresponding purge event was missed. This indicates an error in archive."; } - my $tmp; - $tmp->{"filename"} = $filename; - $tmp->{"purged"} = 1; - return $tmp; + return $ret; } + $ret->{"repid"} = $repid; ############################################ # get all files of LOCAL representation - my $files_ref = get_files_ref($xp, $filesec, $repid); - my $ret; - $ret->{"filename" } = $filename; - $ret->{"title"} = $title; - $ret->{"repid"} = $repid; - $ret->{"files"} = $files_ref; - $ret->{"dcrecords"} = $dcrecords_ref; + $ret->{"files"} = get_files_ref($xp, $filesec, $repid); return $ret; - } # returns count of subdirs of $dir