Select Git revision
exit_strategy.pl 22.06 KiB
#!/usr/bin/perl -w
###############################################################################
# Author: Andreas Romeyke
# SLUB Dresden, Department Longterm Preservation
#
# scans a given repository and creates a SQLite database.
# This is part of the exit-strategy for details, see asciidoc file
# exit_strategie.asciidoc (also contains ER-diagram for database)
#
# call with:
#
# perl ./exit_strategy.pl rosetta_exit_strategy/tmp.db /permanent/
#
###############################################################################
#
# WARNING:
#
# the following messages only occure if you had an unclean SIP ingest process,
# it means that in your IE-XML are wrong/unused namespace declarations
#
# if some AIPs are wrong with messages like:
#
# '/permanent_storage/normal/2017/07/05/IE1043/V1-IE1043.xml:6:
# namespace error : xmlns:mods: 'http://www.loc.gov/mods/v3
# http://www.loc.gov/standards/mods/v3/mods-3-0.xsd' is not a valid URI
# s="http://www.loc.gov/mods/v3
# http://www.loc.gov/standards/mods/v3/mods-3-0.xsd"'
#
# then (and only then) try this:
#
# perl ./exit_strategy.pl --recover rosetta_exit_strategy/tmp.db /permanent/
#
#
# call help with:
# perl ./exit_strategy.pl --help
###############################################################################
use strict;
use warnings;
use feature qw( say signatures );
no warnings "experimental::signatures";
use utf8; # for debugging output
use constant DEBUG => 0; # no debug
use Carp;
use DBD::SQLite;
use DBI;
use File::Basename qw(basename);
use File::Find;
use Getopt::Long;
use IO::Handle;
use List::Util qw(any);
use Path::Tiny;
use Pod::Usage;
use Time::Progress;
use XML::LibXML::XPathContext;
use XML::LibXML;
STDOUT->autoflush(1);
# guarantee, that output will be UTF8
binmode(STDOUT, ":encoding(UTF-8)");
my $db_name="exit_strategy";
my $schema_name="exit_strategy";
my $sourcetype="hdd"; #default value
###############################################################################
# write database creation
# write tables creation
# scan repository
# if IE.xml file found, read its metadata, create SQL add entry