Skip to content
Snippets Groups Projects
Select Git revision
  • 12d026cf994c1196b827d888f96be3b5b00f16b5
  • master default protected
  • v2022.1
3 results

exit_strategy.pl

Blame
  • user avatar
    Andreas Romeyke authored
    - use compiled xpaths
    - use precompiled xpathcontext
    - use nodeValue() instead triggering findvalue()
    - handle purged IEs
    - hande all variants of localreps (LOCAL and LZA and LZA_INTERN)
    12d026cf
    History
    exit_strategy.pl 19.82 KiB
    #!/usr/bin/perl -w
    ###############################################################################
    # Author: Andreas Romeyke
    # SLUB Dresden, Department Longterm Preservation
    #
    # scans a given repository and creates an SQL script to create a database.
    # This is part of the exit-strategy for details, see asciidoc file
    # exit_strategie.asciidoc (also contains ER-diagram for database)
    #
    # call with:
    #
    # perl ./exit_strategy.pl rosetta_exit_strategy/tmp.sql /permanent/
    #
    # file tested with postgres-database
    #
    # using then with:
    #         psql -U romeyke -d exit_strategy \
    #              -f rosetta_exit_strategy/tmp.sql -L rosetta_exit.log
    #
    ###############################################################################
    #
    # WARNING:
    #
    # the following messages only occure if you had an unclean SIP ingest process,
    # it means that in your IE-XML are wrong/unused namespace declarations
    #
    # if some AIPs are wrong with messages like:
    #
    # '/permanent_storage/normal/2017/07/05/IE1043/V1-IE1043.xml:6: 
    #  namespace error : xmlns:mods: 'http://www.loc.gov/mods/v3
    #   http://www.loc.gov/standards/mods/v3/mods-3-0.xsd' is not a valid URI
    #  s="http://www.loc.gov/mods/v3
    #   http://www.loc.gov/standards/mods/v3/mods-3-0.xsd"'
    #
    # then (and only then) try this:
    #
    # perl ./exit_strategy.pl --recover rosetta_exit_strategy/tmp.sql /permanent/
    #
    ###############################################################################
    
    
    use strict;
    use warnings;
    use feature "say";
    use Carp;
    use Path::Tiny;
    use File::Find;
    use File::Sort qw(sort_file);
    use XML::LibXML;
    use Time::Progress;
    use XML::LibXML::XPathContext;
    use Getopt::Long;
    use constant DEBUG => 0; # no debug
    
    use IO::Handle;
    STDOUT->autoflush(1);
    # guarantee, that output will be UTF8
    binmode(STDOUT, ":encoding(UTF-8)");
    my $db_name="exit_strategy";
    my $schema_name="exit_strategy";
    my $sourcetype="hdd"; #default value
    
    ###############################################################################
    # write database creation
    # write tables creation
    # scan repository
    #   if IE.xml file found, read its metadata, create SQL add entry
    #   write SQL add entry
    ###############################################################################
    sub write_database_creation ($) {