Skip to content
Snippets Groups Projects
Select Git revision
  • 3f8f0f9b1f51bb141d489f69bdd39c40e93dec8f
  • master default protected
  • fix_autoclose_bug
  • v2.840
  • works-with-rosetta-7.3-or-higher
  • v2020.2
6 results

Makefile

Blame
  • exit_strategy.pl 33.66 KiB
    #!/usr/bin/perl -w
    ###############################################################################
    # Author: Andreas Romeyke
    # SLUB Dresden, Department Longterm Preservation
    # copyright 2023/2024, licensed under terms of GNU General Public License 3.0,
    # see file LICENSE.txt for details.
    #
    # scans a given repository and creates a SQLite database.
    # This is part of the exit-strategy for details, see asciidoc file
    # exit_strategie.asciidoc (also contains ER-diagram for database)
    #
    # call with:
    #
    # perl ./exit_strategy.pl rosetta_exit_strategy/tmp.db /permanent/
    #
    ###############################################################################
    #
    # WARNING:
    #
    # the following messages only occure if you had an unclean SIP ingest process,
    # it means that in your IE-XML are wrong/unused namespace declarations
    #
    # if some AIPs are wrong with messages like:
    #
    # '/permanent_storage/normal/2017/07/05/IE1043/V1-IE1043.xml:6: 
    #  namespace error : xmlns:mods: 'http://www.loc.gov/mods/v3
    #   http://www.loc.gov/standards/mods/v3/mods-3-0.xsd' is not a valid URI
    #  s="http://www.loc.gov/mods/v3
    #   http://www.loc.gov/standards/mods/v3/mods-3-0.xsd"'
    #
    # then (and only then) try this:
    #
    # perl ./exit_strategy.pl --recover rosetta_exit_strategy/tmp.db /permanent/
    #
    #
    # call help with:
    # perl ./exit_strategy.pl --help
    ###############################################################################
    
    use v5.36;
    use utf8; # for debugging output
    use constant DEBUG => 0; # no debug
    use Carp;
    use DBD::SQLite;
    use DBI;
    use File::Basename qw(basename dirname);
    use File::Find;
    use Getopt::Long;
    use IO::Handle;
    use List::Util qw(first);
    use Path::Tiny;
    use Pod::Usage;
    use Time::Progress;
    use XML::LibXML::XPathContext;
    use XML::LibXML;
    
    sub check_lzaid ($lza_id) {
      my $rx_up=qr{[A-Za-z0-9_-]+}; # archive name & internal workflow
      my $rx_lw=qr{[a-z0-9_-]+};    # external workflow & external id
      # SLUB:LZA:Kitodo:testcases:fileoriginalpath_with_http
      return ($lza_id =~ m/^SLUB:LZA:$rx_up:$rx_lw:$rx_lw$/);
    };
    
    STDOUT->autoflush(1);
    # guarantee, that output will be UTF8
    binmode(STDOUT, ":encoding(UTF-8)");
    my $db_name="exit_strategy";
    my $schema_name="exit_strategy";
    my $sourcetype="hdd"; #default value