From d50fddbf508c0ce983dcfdba20dfdb32a2761992 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Sat, 23 Oct 2021 14:06:02 +0200
Subject: [PATCH] - refactoring, extracted package SLUB::LZA::SIPBuilderBagIt
 into lib/

---
 bin/slubsipbuilderbagit.pl      | 488 +-------------------------------
 lib/SLUB/LZA/SIPBuilderBagIt.pm | 484 +++++++++++++++++++++++++++++++
 2 files changed, 486 insertions(+), 486 deletions(-)
 create mode 100644 lib/SLUB/LZA/SIPBuilderBagIt.pm

diff --git a/bin/slubsipbuilderbagit.pl b/bin/slubsipbuilderbagit.pl
index 98cf51e..4be3aa1 100644
--- a/bin/slubsipbuilderbagit.pl
+++ b/bin/slubsipbuilderbagit.pl
@@ -44,491 +44,6 @@ use strict;
 use warnings;
 use utf8;
 
-package SLUB::LZA::SIPBuilderBagIt;
-    use utf8;
-    use feature 'say';
-    use Archive::BagIt;
-    use Carp;
-    use Encode;
-    use File::Copy;
-    use File::Copy::Recursive qw(dircopy dirmove);
-    use File::Basename;
-    use File::Find;
-    use List::Util qw(none);
-    use LWP::UserAgent;          # to get MARC data
-    use MARC::Record;
-    use Path::Tiny;
-    use Term::ANSIColor;         # colored print
-    use XML::LibXML;
-    use XML::XPath;
-
-    our $VERBOSE = 0;            # print output switch 0 => on, 1 and larger = off
-    our $with_debug = 0;         # output debug infos and files
-
-    # catalogue infos
-    my $swb_url        = 'https://sru.bsz-bw.de/swb';
-    my $searchkey_swb  = 'pica.swn';
-    my $searchkey_k10p = 'pica.ppn';
-    my $recordschema_swb  = 'marcxmlvbos';
-    my $recordschema_k10p = 'marcxmlk10os';
-
-    # MARC21 -> bag-info.txt mapping
-    # reference: https://www.loc.gov/marc/bibliographic/bd20x24x.html
-    my $marc21_to_baginfo_mappings = [
-        { tag => "245", code => "a", key => "Title" },
-        { tag => "245", code => "b", key => "Title" },
-        { tag => "245", code => "c", key => "Title" },
-        { tag => "246", code => "a", key => "Title" },
-        { tag => "246", code => "b", key => "Title" },
-    ];
-
-    sub add_metadata($$$$) {
-        if(ref($_[0]) ne 'ARRAY') { croak "array of hashes not defined for bag-info.txt!"; }
-
-        my $refAddBagInfo   = $_[0];    # ref to array of hashes
-        my $ppn             = $_[1];
-        my $noppn           = $_[2];
-        my $marc21          = $_[3];
-
-        # adding entries to bag-info key list
-        push @{$refAddBagInfo}, {'External-Identifier' => "PPN:$ppn"} unless (! $ppn);
-        push @{$refAddBagInfo}, {'External-Identifier' => "$noppn"} unless (! $noppn);
-        if ($marc21) {
-            foreach my $mapping (@{$marc21_to_baginfo_mappings}) {
-                my $text = get_marc21_text_node($marc21, $mapping);
-                push @{$refAddBagInfo}, {$mapping->{key} => $text} unless (! $text);
-            }
-        }
-        return 1;
-    }
-
-    sub get_marc21_text_node($$) {
-        if(! $_[0]) { croak "empty marc21!"; }
-        if(ref($_[1]) ne 'HASH') { croak "not a mapping hash!"; }
-
-        my $marc21  = $_[0];
-        my $mapping = $_[1];
-
-        # retrieve value based on mapping using a xpath
-        my $xpath = "//*[\@tag='" . $mapping->{tag} . "']/";
-        $xpath .= "*[local-name()='subfield' and \@code='" . $mapping->{code} . "']/" unless (! exists $mapping->{code});
-        $xpath .= "text()";
-        # HINT: xpath example //*[@tag='245']/*[local-name()='subfield' and @code='a']/text()
-        my $text = XML::XPath->new($marc21)->findnodes_as_string($xpath);
-
-        return $text;
-    }
-
-    sub get_marc21_from_catalogue($){
-        if(! $_[0]){ croak "ppn not defined!"; }
-
-        my $ppn = $_[0];
-        my $marc21;
-        my $use_fallback;
-
-        eval { # try SWB catalogue
-            $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from($swb_url, $ppn, $searchkey_swb, $recordschema_swb);
-        };
-        if ($@) { # error
-            say "$@" unless (! $with_debug);
-            if ($@ =~ m#ERROR: Did not get any <recordData/> for PPN#) {
-                print_scalar_data("", "WARNING: '$ppn' not a SWB PPN, fallback to K10plus", "", "yellow");
-                $use_fallback = 1;
-            }
-            else {
-                die "$@"; # throw exception again
-            }
-        }
-
-        if ($use_fallback) {
-            eval { # try K10plus catalogue
-                $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from($swb_url, $ppn, $searchkey_k10p, $recordschema_k10p);
-            };
-            if ($@) { # error
-                say "$@" unless (!$with_debug);
-                if ($@ =~ m#ERROR: Did not get any <recordData/> for PPN#) {
-                    print_scalar_data("", "ERROR: '$ppn' also not a K10plus PPN", "", "red");
-                    exit 1;
-                }
-                else {
-                    die "$@"; # throw exception again
-                }
-            }
-        }
-
-        print_scalar_data("", "INFO: Found metadata for PPN '$ppn'", "", "white");
-        return $marc21;
-    }
-
-    sub get_marc21_from($$$$){
-        # specification SRU/SRW BSZ: https://wiki.k10plus.de/pages/viewpage.action?pageId=132874251
-        if(! defined $_[0]) { croak "url not defined!"; }
-        if(! defined $_[1]) { croak "ppn not defined!"; }
-        if(! defined $_[2]) { croak "key not defined!"; }
-        if(! defined $_[3]) { croak "schema not defined!"; }
-        if($_[0] eq "" || $_[1] eq "" || $_[2] eq "" || $_[3] eq ""){
-            die "invalid parameters.";
-        }
-        my $url    = shift;
-        my $ppn    = shift; # example: "457035137"  for "Der Fichtelberg"
-        my $key    = shift;
-        my $schema = shift;
-
-        my $ua = LWP::UserAgent->new;
-        $ua->agent("MyApp/0.1 ");
-        $ua->timeout(3600); #1h
-
-        my $srubase        = $url; # host
-        my $srusearchkey   = $key; # SRU search key
-        my $sruvalue       = $ppn;
-        my $srumaxrecords  = 1;
-        my $srustartrecord = 1;
-        my $sruschema      = $schema;
-        my $sru            = "${srubase}?version=1.1&query=${srusearchkey}%3D${sruvalue}&operation=searchRetrieve&maximumRecords=${srumaxrecords}&startRecord=${srustartrecord}&recordSchema=${sruschema}";
-
-        if ($with_debug) {say "catalog-URL='$sru'";}
-        my $response = $ua->get($sru); # ask SWB for given PPN
-        if ($response->is_success) {
-            # parse ZiNG response, extract MARC-data
-            my $xp = XML::XPath->new($response->decoded_content);
-            my $parser = XML::LibXML->new();
-            if ($with_debug) {
-                say "write DEBUG_${ppn}_catalog_response.xml";
-                SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_${ppn}_catalog_response.xml", $response->decoded_content);
-            }
-            my $recordData = $xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*');
-            if (!$recordData) { croak("ERROR: Did not get any <recordData/> for PPN '$ppn' using '$sru'");}
-            my $marc_doc = $parser->parse_string($recordData);
-            add_marc21_schema_location($marc_doc);
-            if ($with_debug) {
-                say "write DEBUG_${ppn}_extracted_marc_record.xml";
-                SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_${ppn}_extracted_marc_record.xml", $marc_doc);
-            }
-            return $marc_doc->serialize();
-        }
-        else {
-            carp("Problem asking catalogue at $url using $ppn");
-        }
-
-        return;
-    }
-
-    sub add_marc21_schema_location($) {
-        if(ref($_[0]) ne 'XML::LibXML::Document') { croak "marc xml document required!"; }
-        my $marc_doc = $_[0];
-        my $root = $marc_doc->findnodes('/*[local-name()="record"]')->[0]; # get root element (XML::LibXML::Element)
-        $root->setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'); # add xsi namespace (simple)
-        $root->setAttribute('xsi:schemaLocation', 'http://www.loc.gov/MARC21/slim https://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'); # add MARCXML schema location
-        # HINT: method sets or replaces the node's attribute
-        1;
-    }
-
-    sub write_file($$){
-        if(! defined $_[0]) { croak "filename not defined!"; }
-        if(! defined $_[1]) { croak "value not defined!"; }
-        if($_[0] eq "" || $_[1] eq ""){ die "invalid parameters."; }
-
-        my $filename = $_[0];
-        my $value    = $_[1];
-
-        # write data to file (UTF-8)
-        open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
-        print $fh $value;
-        close($fh) || (croak "could not close file '$filename', $!");
-        return 1;
-    }
-
-
-    sub generateBagName($$$){
-        my $file_date = $_[0];
-        my $ppn = $_[1];
-        my $noppn = $_[2];
-
-        $file_date =~ s/T/_/g; # replace 'T' with '_'
-        $file_date =~ s/:/-/g; # replace ':' with '-'
-        return (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
-    }
-
-    sub createDir($){
-        my $path = $_[0];
-
-        if(! -d $path && ! -f $path){
-            eval { path($path)->mkpath; };
-            if ($@) { # error
-                print_scalar_data("", "ERROR: Failed to create directory '$path'.", "", "red");
-                die $@;
-            }
-        }
-        return 1;
-    }
-
-    sub copyFilesToMeta($$$$$){
-        my $ppn = $_[0];
-        my $metaPath = $_[1];
-        my $rightsFilePath = $_[2];
-        my $aRefAddMetaFile = $_[3];
-        my $marc21 = $_[4];
-
-        my @addMetaFile = @{ $aRefAddMetaFile };
-
-        if(@addMetaFile){
-            my $i = 1;
-            foreach my $file(@addMetaFile){
-                # check reserved file names
-                my $meta_file_name = basename($file);
-                if($meta_file_name eq "rights.xml" || $meta_file_name eq "marc21.xml"){
-                    foreach my $f(@addMetaFile){
-                        # check if numbered xml file exists
-                        my $f_name = basename($f);
-                        while($f_name eq "$i.xml"){
-                            $i++;
-                        }
-                    }
-                    print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <marc21.xml> is reserved.", "", "yellow");
-                    print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
-                    copy($file, $metaPath) or die "Copy failed: $!";
-                    rename("$metaPath/$meta_file_name", "$metaPath/$i.xml");
-                    $i++;
-                }else{
-                    copy($file, $metaPath) or die "Copy failed: $!";
-                }
-            }
-        }
-
-        # marc21.xml
-        if ($ppn && $marc21) {
-            write_file("$metaPath/marc21.xml", $marc21);
-        }
-
-        # rights.xml
-        if(basename($rightsFilePath) ne "rights.xml"){
-            my $rights_name = basename($rightsFilePath);
-            print_scalar_data("", "WARNING: Renaming " . $rights_name . " to rights.xml, because meta filename <rights.xml> is required.", "", "yellow");
-            print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
-            copy($rightsFilePath, $metaPath) or die "Copy failed: $!";
-            rename("$metaPath/$rights_name", "$metaPath/rights.xml");
-        }else{
-            copy($rightsFilePath, $metaPath) or die "Copy failed: $!";
-        }
-
-        return 1;
-    }
-
-    sub validateRightsXML($$){
-        if(! $_[0]) { croak "rightsFilePath not defined!"; }
-        if(! defined $_[1]) { croak "xsd name not defined!"; }
-
-        my $rightsFilePath = $_[0];
-        my $xsdName = $_[1];
-
-        my $rightsName = basename($rightsFilePath);
-        my $xsd_file = path(__FILE__)->parent->realpath->parent->child("xsd")->child($xsdName);    # absolute path
-
-        my $schema = XML::LibXML::Schema->new(location => $xsd_file);
-        my $parser = XML::LibXML->new;
-        my $doc    = $parser->parse_file($rightsFilePath);
-
-        eval {
-            $schema->validate($doc);
-        };
-        if ($@) { # error
-            die "File $rightsName failed validation: $@";
-        };
-        return 1;
-    }
-
-    sub print_scalar_data($$$$){
-        my $priority = 0;
-        my $header   = $_[0];
-        my $p        = $_[1];
-        my $arrRef   = $_[2];
-        my $pColor   = $_[3];
-
-        # header
-        if($header ne ""){
-            print "----------------------------------------------------------------------------------\n" if $priority >= $VERBOSE;
-            printf "%" . length($header) . "s\n", colored($header, 'bold green') if $priority >= $VERBOSE;
-            print "----------------------------------------------------------------------------------\n" if $priority >= $VERBOSE;
-        }
-        # paragraph
-        if(($p ne "") && ($arrRef eq "")){
-            printf "%" . length($p) . "s\n", colored($p, "bold $pColor") if $priority >= $VERBOSE;
-        }
-
-        # paragraph with converted array
-        if(($p ne "") && ($arrRef ne "")){
-            my $string = "";
-            for my $i (0 .. $#$arrRef){
-                if($i eq $#$arrRef){
-                    $string .= " $$arrRef[$i]";
-                } else {
-                    $string .= " $$arrRef[$i],";
-                }
-            }
-            my $body = $p . $string;
-            printf "%" . length($body) . "s\n", colored($body, "bold $pColor") if $priority >= $VERBOSE;
-        }
-
-        return 1;
-    }
-
-    sub create_slub_bagit($$){
-        if(! defined $_[0]) { croak "bagPath is not defined!"; }
-        if(! defined $_[1]) { croak "array of hashes not defined for bag-info.txt!"; }
-        if(! -d $_[0]) { croak "path is not directory!" }
-
-        my $bagPath       = $_[0];
-        my $refAddBagInfo = $_[1];    # ref to array of hashes
-
-        # construct bag
-        my $oArchiveBagIt = Archive::BagIt->make_bag($bagPath);
-        $oArchiveBagIt->bag_info($refAddBagInfo);
-        $oArchiveBagIt->store();
-        print_scalar_data("", "SUCCESS: SLUB SIP at $bagPath build successfully!", "", "green");
-
-        return 1;
-    }
-
-    sub buildBagWithCopyOption($$$$$$$$$){
-        my $ppn = $_[0];
-        my $noppn = $_[1];
-        my $ieDirectory = $_[2];
-        my $rightsFilePath = $_[3];
-        my $aRefAddMetaFile = $_[4];
-        my $refAddBagInfo = $_[5];    # ref to array of hashes
-        my $bagPath = $_[6];
-        my $dataPath = $_[7];
-        my $metaPath = $_[8];
-
-        my @addMetaFile = @{ $aRefAddMetaFile };
-        my @addBagInfo = @{ $refAddBagInfo };
-        my $marc21;
-
-        # get descriptive metadata from catalog
-        if ($ppn) {
-            $marc21 = get_marc21_from_catalogue($ppn);
-        }
-        # create bag dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
-        # create data dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($dataPath);
-        # copy payload files to data
-        print_scalar_data("", "INFO: copying original data...", "", "white");
-        dircopy($ieDirectory, $dataPath) || die ("Error coping $ieDirectory to $dataPath");
-        # create meta dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
-        # copy not payload files to meta
-        SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
-        # add metadata for bag-info.txt
-        add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
-        # metadata warnings
-        SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
-        # create slub bagit
-        print_scalar_data("", "INFO: building SIP...", "", "white");
-        SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);
-
-        return 1;
-    }
-
-    sub buildBagWithMoveOption($$$$$$$$$){
-        my $ppn = $_[0];
-        my $noppn = $_[1];
-        my $ieDirectory = $_[2];
-        my $rightsFilePath = $_[3];
-        my $aRefAddMetaFile = $_[4];
-        my $refAddBagInfo = $_[5];    # ref to array of hashes
-        my $bagPath = $_[6];
-        my $dataPath = $_[7];
-        my $metaPath = $_[8];
-
-        my @addMetaFile = @{ $aRefAddMetaFile };
-        my @addBagInfo = @{ $refAddBagInfo };
-        my $marc21;
-
-        # get descriptive metadata from catalog
-        if ($ppn) {
-            $marc21 = get_marc21_from_catalogue($ppn);
-        }
-        # create bag dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
-        # move payloads to data dir (which is automatically renamed from IE Directory)
-        print_scalar_data("", "INFO: moving original data...", "", "white");
-        dirmove($ieDirectory, $dataPath) || die ("Error moving $ieDirectory to $dataPath");
-        # create meta dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
-        # copy not payload files to meta
-        SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
-        # add metadata for bag-info.txt
-        add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
-        # metadata warnings
-        SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
-        # create slub bagit
-        print_scalar_data("", "INFO: building SIP...", "", "white");
-        SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);
-
-        return 1;
-    }
-
-    sub buildBagWithReplaceOption($$$$$$$$$){
-        my $ppn = $_[0];
-        my $noppn = $_[1];
-        my $ieDirectory = $_[2];
-        my $rightsFilePath = $_[3];
-        my $aRefAddMetaFile = $_[4];
-        my $refAddBagInfo = $_[5];    # ref to array of hashes
-        my $bagPath = $_[6];
-        my $dataPath = $_[7];
-        my $metaPath = $_[8];
-
-        my @addMetaFile = @{ $aRefAddMetaFile };
-        my @addBagInfo = @{ $refAddBagInfo };
-        my $marc21;
-
-        # get descriptive metadata from catalog
-        if ($ppn) {
-            $marc21 = get_marc21_from_catalogue($ppn);
-        }
-        # create bag dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
-        # move payloads to data dir (which is automatically renamed from IE Directory)
-        print_scalar_data("", "INFO: moving original data...", "", "white");
-        dirmove($ieDirectory, $dataPath) || die ("Error moving $ieDirectory to $dataPath");
-        # create meta dir
-        SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
-        # copy not payload files to meta
-        SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
-        # add metadata for bag-info.txt
-        add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
-        # metadata warnings
-        SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
-        # create slub bagit
-        print_scalar_data("", "INFO: building SIP...", "", "white");
-        SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);
-
-        return 1;
-    }
-
-    sub checkForTitle($){
-        if(ref($_[0]) ne 'ARRAY') { croak "no array of hashes with bag-info.txt key values given!"; }
-
-        my $key_hash_list_aref = $_[0];
-
-        # collect all bag-info keys for easy check
-        my @keys;
-        for my $hash (@{$key_hash_list_aref}) {
-            push @keys, keys %{$hash};
-        }
-        # check for title
-        if (none { $_ =~ m/^[Tt][Ii][Tt][Ll][Ee]/ } @keys) {
-            print_scalar_data("", "WARNING: No Title given. Please consider adding a human-readable title with '--add_key_value=Title:YOUR-TITLE'", "", "yellow");
-            return 0;
-        }
-        return 1;
-    }
-
-# end package
-
-package main;
 #===============================================================================
 
 BEGIN{
@@ -794,9 +309,10 @@ sub main{
 #===============================================================================
 
 main();
-
+1;
 #===============================================================================
 
+
 __END__
 
 =pod
diff --git a/lib/SLUB/LZA/SIPBuilderBagIt.pm b/lib/SLUB/LZA/SIPBuilderBagIt.pm
new file mode 100644
index 0000000..6b390b7
--- /dev/null
+++ b/lib/SLUB/LZA/SIPBuilderBagIt.pm
@@ -0,0 +1,484 @@
+package SLUB::LZA::SIPBuilderBagIt;
+use strict;
+use warnings;
+use utf8;
+use feature 'say';
+use Archive::BagIt;
+use Carp;
+use Encode;
+use File::Copy;
+use File::Copy::Recursive qw(dircopy dirmove);
+use File::Basename;
+use File::Find;
+use List::Util qw(none);
+use LWP::UserAgent;          # to get MARC data
+use MARC::Record;
+use Path::Tiny;
+use Term::ANSIColor;         # colored print
+use XML::LibXML;
+use XML::XPath;
+
+our $VERBOSE = 0;            # print output switch 0 => on, 1 and larger = off
+our $with_debug = 0;         # output debug infos and files
+
+# catalogue infos
+my $swb_url        = 'https://sru.bsz-bw.de/swb';
+my $searchkey_swb  = 'pica.swn';
+my $searchkey_k10p = 'pica.ppn';
+my $recordschema_swb  = 'marcxmlvbos';
+my $recordschema_k10p = 'marcxmlk10os';
+
+# MARC21 -> bag-info.txt mapping
+# reference: https://www.loc.gov/marc/bibliographic/bd20x24x.html
+my $marc21_to_baginfo_mappings = [
+    { tag => "245", code => "a", key => "Title" },
+    { tag => "245", code => "b", key => "Title" },
+    { tag => "245", code => "c", key => "Title" },
+    { tag => "246", code => "a", key => "Title" },
+    { tag => "246", code => "b", key => "Title" },
+];
+
+sub add_metadata($$$$) {
+    if(ref($_[0]) ne 'ARRAY') { croak "array of hashes not defined for bag-info.txt!"; }
+
+    my $refAddBagInfo   = $_[0];    # ref to array of hashes
+    my $ppn             = $_[1];
+    my $noppn           = $_[2];
+    my $marc21          = $_[3];
+
+    # adding entries to bag-info key list
+    push @{$refAddBagInfo}, {'External-Identifier' => "PPN:$ppn"} unless (! $ppn);
+    push @{$refAddBagInfo}, {'External-Identifier' => "$noppn"} unless (! $noppn);
+    if ($marc21) {
+        foreach my $mapping (@{$marc21_to_baginfo_mappings}) {
+            my $text = get_marc21_text_node($marc21, $mapping);
+            push @{$refAddBagInfo}, {$mapping->{key} => $text} unless (! $text);
+        }
+    }
+    return 1;
+}
+
+sub get_marc21_text_node($$) {
+    if(! $_[0]) { croak "empty marc21!"; }
+    if(ref($_[1]) ne 'HASH') { croak "not a mapping hash!"; }
+
+    my $marc21  = $_[0];
+    my $mapping = $_[1];
+
+    # retrieve value based on mapping using a xpath
+    my $xpath = "//*[\@tag='" . $mapping->{tag} . "']/";
+    $xpath .= "*[local-name()='subfield' and \@code='" . $mapping->{code} . "']/" unless (! exists $mapping->{code});
+    $xpath .= "text()";
+    # HINT: xpath example //*[@tag='245']/*[local-name()='subfield' and @code='a']/text()
+    my $text = XML::XPath->new($marc21)->findnodes_as_string($xpath);
+
+    return $text;
+}
+
+sub get_marc21_from_catalogue($){
+    if(! $_[0]){ croak "ppn not defined!"; }
+
+    my $ppn = $_[0];
+    my $marc21;
+    my $use_fallback;
+
+    eval { # try SWB catalogue
+        $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from($swb_url, $ppn, $searchkey_swb, $recordschema_swb);
+    };
+    if ($@) { # error
+        say "$@" unless (! $with_debug);
+        if ($@ =~ m#ERROR: Did not get any <recordData/> for PPN#) {
+            print_scalar_data("", "WARNING: '$ppn' not a SWB PPN, fallback to K10plus", "", "yellow");
+            $use_fallback = 1;
+        }
+        else {
+            die "$@"; # throw exception again
+        }
+    }
+
+    if ($use_fallback) {
+        eval { # try K10plus catalogue
+            $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from($swb_url, $ppn, $searchkey_k10p, $recordschema_k10p);
+        };
+        if ($@) { # error
+            say "$@" unless (!$with_debug);
+            if ($@ =~ m#ERROR: Did not get any <recordData/> for PPN#) {
+                print_scalar_data("", "ERROR: '$ppn' also not a K10plus PPN", "", "red");
+                exit 1;
+            }
+            else {
+                die "$@"; # throw exception again
+            }
+        }
+    }
+
+    print_scalar_data("", "INFO: Found metadata for PPN '$ppn'", "", "white");
+    return $marc21;
+}
+
+sub get_marc21_from($$$$){
+    # specification SRU/SRW BSZ: https://wiki.k10plus.de/pages/viewpage.action?pageId=132874251
+    if(! defined $_[0]) { croak "url not defined!"; }
+    if(! defined $_[1]) { croak "ppn not defined!"; }
+    if(! defined $_[2]) { croak "key not defined!"; }
+    if(! defined $_[3]) { croak "schema not defined!"; }
+    if($_[0] eq "" || $_[1] eq "" || $_[2] eq "" || $_[3] eq ""){
+        die "invalid parameters.";
+    }
+    my $url    = shift;
+    my $ppn    = shift; # example: "457035137"  for "Der Fichtelberg"
+    my $key    = shift;
+    my $schema = shift;
+
+    my $ua = LWP::UserAgent->new;
+    $ua->agent("MyApp/0.1 ");
+    $ua->timeout(3600); #1h
+
+    my $srubase        = $url; # host
+    my $srusearchkey   = $key; # SRU search key
+    my $sruvalue       = $ppn;
+    my $srumaxrecords  = 1;
+    my $srustartrecord = 1;
+    my $sruschema      = $schema;
+    my $sru            = "${srubase}?version=1.1&query=${srusearchkey}%3D${sruvalue}&operation=searchRetrieve&maximumRecords=${srumaxrecords}&startRecord=${srustartrecord}&recordSchema=${sruschema}";
+
+    if ($with_debug) {say "catalog-URL='$sru'";}
+    my $response = $ua->get($sru); # ask SWB for given PPN
+    if ($response->is_success) {
+        # parse ZiNG response, extract MARC-data
+        my $xp = XML::XPath->new($response->decoded_content);
+        my $parser = XML::LibXML->new();
+        if ($with_debug) {
+            say "write DEBUG_${ppn}_catalog_response.xml";
+            SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_${ppn}_catalog_response.xml", $response->decoded_content);
+        }
+        my $recordData = $xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*');
+        if (!$recordData) { croak("ERROR: Did not get any <recordData/> for PPN '$ppn' using '$sru'");}
+        my $marc_doc = $parser->parse_string($recordData);
+        add_marc21_schema_location($marc_doc);
+        if ($with_debug) {
+            say "write DEBUG_${ppn}_extracted_marc_record.xml";
+            SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_${ppn}_extracted_marc_record.xml", $marc_doc);
+        }
+        return $marc_doc->serialize();
+    }
+    else {
+        carp("Problem asking catalogue at $url using $ppn");
+    }
+
+    return;
+}
+
+sub add_marc21_schema_location($) {
+    if(ref($_[0]) ne 'XML::LibXML::Document') { croak "marc xml document required!"; }
+    my $marc_doc = $_[0];
+    my $root = $marc_doc->findnodes('/*[local-name()="record"]')->[0]; # get root element (XML::LibXML::Element)
+    $root->setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'); # add xsi namespace (simple)
+    $root->setAttribute('xsi:schemaLocation', 'http://www.loc.gov/MARC21/slim https://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'); # add MARCXML schema location
+    # HINT: method sets or replaces the node's attribute
+    1;
+}
+
+sub write_file($$){
+    if(! defined $_[0]) { croak "filename not defined!"; }
+    if(! defined $_[1]) { croak "value not defined!"; }
+    if($_[0] eq "" || $_[1] eq ""){ die "invalid parameters."; }
+
+    my $filename = $_[0];
+    my $value    = $_[1];
+
+    # write data to file (UTF-8)
+    open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
+    print $fh $value;
+    close($fh) || (croak "could not close file '$filename', $!");
+    return 1;
+}
+
+
+sub generateBagName($$$){
+    my $file_date = $_[0];
+    my $ppn = $_[1];
+    my $noppn = $_[2];
+
+    $file_date =~ s/T/_/g; # replace 'T' with '_'
+    $file_date =~ s/:/-/g; # replace ':' with '-'
+    return (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
+}
+
+sub createDir($){
+    my $path = $_[0];
+
+    if(! -d $path && ! -f $path){
+        eval { path($path)->mkpath; };
+        if ($@) { # error
+            print_scalar_data("", "ERROR: Failed to create directory '$path'.", "", "red");
+            die $@;
+        }
+    }
+    return 1;
+}
+
+sub copyFilesToMeta($$$$$){
+    my $ppn = $_[0];
+    my $metaPath = $_[1];
+    my $rightsFilePath = $_[2];
+    my $aRefAddMetaFile = $_[3];
+    my $marc21 = $_[4];
+
+    my @addMetaFile = @{ $aRefAddMetaFile };
+
+    if(@addMetaFile){
+        my $i = 1;
+        foreach my $file(@addMetaFile){
+            # check reserved file names
+            my $meta_file_name = basename($file);
+            if($meta_file_name eq "rights.xml" || $meta_file_name eq "marc21.xml"){
+                foreach my $f(@addMetaFile){
+                    # check if numbered xml file exists
+                    my $f_name = basename($f);
+                    while($f_name eq "$i.xml"){
+                        $i++;
+                    }
+                }
+                print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <marc21.xml> is reserved.", "", "yellow");
+                print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
+                copy($file, $metaPath) or die "Copy failed: $!";
+                rename("$metaPath/$meta_file_name", "$metaPath/$i.xml");
+                $i++;
+            }else{
+                copy($file, $metaPath) or die "Copy failed: $!";
+            }
+        }
+    }
+
+    # marc21.xml
+    if ($ppn && $marc21) {
+        write_file("$metaPath/marc21.xml", $marc21);
+    }
+
+    # rights.xml
+    if(basename($rightsFilePath) ne "rights.xml"){
+        my $rights_name = basename($rightsFilePath);
+        print_scalar_data("", "WARNING: Renaming " . $rights_name . " to rights.xml, because meta filename <rights.xml> is required.", "", "yellow");
+        print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
+        copy($rightsFilePath, $metaPath) or die "Copy failed: $!";
+        rename("$metaPath/$rights_name", "$metaPath/rights.xml");
+    }else{
+        copy($rightsFilePath, $metaPath) or die "Copy failed: $!";
+    }
+
+    return 1;
+}
+
+sub validateRightsXML($$){
+    if(! $_[0]) { croak "rightsFilePath not defined!"; }
+    if(! defined $_[1]) { croak "xsd name not defined!"; }
+
+    my $rightsFilePath = $_[0];
+    my $xsdName = $_[1];
+
+    my $rightsName = basename($rightsFilePath);
+    my $xsd_file = path(__FILE__)->parent->realpath->parent->child("xsd")->child($xsdName);    # absolute path
+
+    my $schema = XML::LibXML::Schema->new(location => $xsd_file);
+    my $parser = XML::LibXML->new;
+    my $doc    = $parser->parse_file($rightsFilePath);
+
+    eval {
+        $schema->validate($doc);
+    };
+    if ($@) { # error
+        die "File $rightsName failed validation: $@";
+    };
+    return 1;
+}
+
+sub print_scalar_data($$$$){
+    my $priority = 0;
+    my $header   = $_[0];
+    my $p        = $_[1];
+    my $arrRef   = $_[2];
+    my $pColor   = $_[3];
+
+    # header
+    if($header ne ""){
+        print "----------------------------------------------------------------------------------\n" if $priority >= $VERBOSE;
+        printf "%" . length($header) . "s\n", colored($header, 'bold green') if $priority >= $VERBOSE;
+        print "----------------------------------------------------------------------------------\n" if $priority >= $VERBOSE;
+    }
+    # paragraph
+    if(($p ne "") && ($arrRef eq "")){
+        printf "%" . length($p) . "s\n", colored($p, "bold $pColor") if $priority >= $VERBOSE;
+    }
+
+    # paragraph with converted array
+    if(($p ne "") && ($arrRef ne "")){
+        my $string = "";
+        for my $i (0 .. $#$arrRef){
+            if($i eq $#$arrRef){
+                $string .= " $$arrRef[$i]";
+            } else {
+                $string .= " $$arrRef[$i],";
+            }
+        }
+        my $body = $p . $string;
+        printf "%" . length($body) . "s\n", colored($body, "bold $pColor") if $priority >= $VERBOSE;
+    }
+
+    return 1;
+}
+
+sub create_slub_bagit($$){
+    if(! defined $_[0]) { croak "bagPath is not defined!"; }
+    if(! defined $_[1]) { croak "array of hashes not defined for bag-info.txt!"; }
+    if(! -d $_[0]) { croak "path is not directory!" }
+
+    my $bagPath       = $_[0];
+    my $refAddBagInfo = $_[1];    # ref to array of hashes
+
+    # construct bag
+    my $oArchiveBagIt = Archive::BagIt->make_bag($bagPath);
+    $oArchiveBagIt->bag_info($refAddBagInfo);
+    $oArchiveBagIt->store();
+    print_scalar_data("", "SUCCESS: SLUB SIP at $bagPath build successfully!", "", "green");
+
+    return 1;
+}
+
+sub buildBagWithCopyOption($$$$$$$$$){
+    my $ppn = $_[0];
+    my $noppn = $_[1];
+    my $ieDirectory = $_[2];
+    my $rightsFilePath = $_[3];
+    my $aRefAddMetaFile = $_[4];
+    my $refAddBagInfo = $_[5];    # ref to array of hashes
+    my $bagPath = $_[6];
+    my $dataPath = $_[7];
+    my $metaPath = $_[8];
+
+    my @addMetaFile = @{ $aRefAddMetaFile };
+    my @addBagInfo = @{ $refAddBagInfo };
+    my $marc21;
+
+    # get descriptive metadata from catalog
+    if ($ppn) {
+        $marc21 = get_marc21_from_catalogue($ppn);
+    }
+    # create bag dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
+    # create data dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($dataPath);
+    # copy payload files to data
+    print_scalar_data("", "INFO: copying original data...", "", "white");
+    dircopy($ieDirectory, $dataPath) || die ("Error coping $ieDirectory to $dataPath");
+    # create meta dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
+    # copy not payload files to meta
+    SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
+    # add metadata for bag-info.txt
+    add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
+    # metadata warnings
+    SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
+    # create slub bagit
+    print_scalar_data("", "INFO: building SIP...", "", "white");
+    SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);
+
+    return 1;
+}
+
+sub buildBagWithMoveOption($$$$$$$$$){
+    my $ppn = $_[0];
+    my $noppn = $_[1];
+    my $ieDirectory = $_[2];
+    my $rightsFilePath = $_[3];
+    my $aRefAddMetaFile = $_[4];
+    my $refAddBagInfo = $_[5];    # ref to array of hashes
+    my $bagPath = $_[6];
+    my $dataPath = $_[7];
+    my $metaPath = $_[8];
+
+    my @addMetaFile = @{ $aRefAddMetaFile };
+    my @addBagInfo = @{ $refAddBagInfo };
+    my $marc21;
+
+    # get descriptive metadata from catalog
+    if ($ppn) {
+        $marc21 = get_marc21_from_catalogue($ppn);
+    }
+    # create bag dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
+    # move payloads to data dir (which is automatically renamed from IE Directory)
+    print_scalar_data("", "INFO: moving original data...", "", "white");
+    dirmove($ieDirectory, $dataPath) || die ("Error moving $ieDirectory to $dataPath");
+    # create meta dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
+    # copy not payload files to meta
+    SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
+    # add metadata for bag-info.txt
+    add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
+    # metadata warnings
+    SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
+    # create slub bagit
+    print_scalar_data("", "INFO: building SIP...", "", "white");
+    SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);
+
+    return 1;
+}
+
+sub buildBagWithReplaceOption($$$$$$$$$){
+    my $ppn = $_[0];
+    my $noppn = $_[1];
+    my $ieDirectory = $_[2];
+    my $rightsFilePath = $_[3];
+    my $aRefAddMetaFile = $_[4];
+    my $refAddBagInfo = $_[5];    # ref to array of hashes
+    my $bagPath = $_[6];
+    my $dataPath = $_[7];
+    my $metaPath = $_[8];
+
+    my @addMetaFile = @{ $aRefAddMetaFile };
+    my @addBagInfo = @{ $refAddBagInfo };
+    my $marc21;
+
+    # get descriptive metadata from catalog
+    if ($ppn) {
+        $marc21 = get_marc21_from_catalogue($ppn);
+    }
+    # create bag dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
+    # move payloads to data dir (which is automatically renamed from IE Directory)
+    print_scalar_data("", "INFO: moving original data...", "", "white");
+    dirmove($ieDirectory, $dataPath) || die ("Error moving $ieDirectory to $dataPath");
+    # create meta dir
+    SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
+    # copy not payload files to meta
+    SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
+    # add metadata for bag-info.txt
+    add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
+    # metadata warnings
+    SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
+    # create slub bagit
+    print_scalar_data("", "INFO: building SIP...", "", "white");
+    SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);
+
+    return 1;
+}
+
+sub checkForTitle($){
+    if(ref($_[0]) ne 'ARRAY') { croak "no array of hashes with bag-info.txt key values given!"; }
+
+    my $key_hash_list_aref = $_[0];
+
+    # collect all bag-info keys for easy check
+    my @keys;
+    for my $hash (@{$key_hash_list_aref}) {
+        push @keys, keys %{$hash};
+    }
+    # check for title
+    if (none { $_ =~ m/^[Tt][Ii][Tt][Ll][Ee]/ } @keys) {
+        print_scalar_data("", "WARNING: No Title given. Please consider adding a human-readable title with '--add_key_value=Title:YOUR-TITLE'", "", "yellow");
+        return 0;
+    }
+    return 1;
+}
+1;
\ No newline at end of file
-- 
GitLab