-
Andreas Romeyke authoredAndreas Romeyke authored
exit_strategy.pl 34.02 KiB
#!/usr/bin/perl -w
###############################################################################
# Author: Andreas Romeyke
# SLUB Dresden, Department Longterm Preservation
# copyright 2023/2024, licensed under terms of GNU General Public License 3.0,
# see file LICENSE.txt for details.
#
# scans a given repository and creates a SQLite database.
# This is part of the exit-strategy for details, see asciidoc file
# exit_strategie.asciidoc (also contains ER-diagram for database)
#
# call with:
#
# perl ./exit_strategy.pl rosetta_exit_strategy/tmp.db /permanent/
#
###############################################################################
#
# WARNING:
#
# the following messages only occure if you had an unclean SIP ingest process,
# it means that in your IE-XML are wrong/unused namespace declarations
#
# if some AIPs are wrong with messages like:
#
# '/permanent_storage/normal/2017/07/05/IE1043/V1-IE1043.xml:6:
# namespace error : xmlns:mods: 'http://www.loc.gov/mods/v3
# http://www.loc.gov/standards/mods/v3/mods-3-0.xsd' is not a valid URI
# s="http://www.loc.gov/mods/v3
# http://www.loc.gov/standards/mods/v3/mods-3-0.xsd"'
#
# then (and only then) try this:
#
# perl ./exit_strategy.pl --recover rosetta_exit_strategy/tmp.db /permanent/
#
#
# call help with:
# perl ./exit_strategy.pl --help
###############################################################################
use v5.36;
use utf8; # for debugging output
use constant DEBUG => 0; # no debug
use Carp;
use DBD::SQLite;
use DBI;
use File::Basename qw(basename dirname);
use File::Find;
use Getopt::Long;
use IO::Handle;
use List::Util qw(first zip);
use Path::Tiny;
use Pod::Usage;
use Time::Progress;
use XML::LibXML::XPathContext;
use XML::LibXML;
sub check_lzaid ($lza_id) {
my $rx_up=qr{[A-Za-z0-9_-]+}; # archive name & internal workflow
my $rx_lw=qr{[a-z0-9_-]+}; # external workflow & external id
# SLUB:LZA:Kitodo:testcases:fileoriginalpath_with_http
return ($lza_id =~ m/^SLUB:LZA:$rx_up:$rx_lw:$rx_lw$/);
};
STDOUT->autoflush(1);
# guarantee, that output will be UTF8
binmode(STDOUT, ":encoding(UTF-8)");
my $db_name="exit_strategy";
my $schema_name="exit_strategy";
my $sourcetype="hdd"; #default value