#!/usr/bin/env perl
#===============================================================================
#
#         FILE: slubsipbuilder.pl
#
#        USAGE: ./slubsipbuilder.pl
#
#  DESCRIPTION: A CLI tool to create a valid SIP for SLUBArchiv
#
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: related to official document
#               "SIP Spezifikation (v1.5)"
#       AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de)
# ORGANIZATION: SLUB
#      VERSION: 1.1
#      CREATED: 2019-07-23
#===============================================================================


use strict;
use warnings;
use Carp;
use 5.28.0;
package SLUB::LZA::SIPBuilder;
    use DateTime::Format::ISO8601;
    use File::Copy qw(cp);
    use File::Find;
    use Path::Tiny;
    use LWP::UserAgent; # to get MARC data
    use MARC::Record;
    use XML::LibXML;
    use XML::LibXSLT;
    use XML::XPath;
    use Carp;
	use Encode;
	
    my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
    my $marc_utils_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl';
    my $swb_url = 'https://sru.bsz-bw.de/swb';
    my $searchkey = "pica.swn";
    my $recordschema = "marcxmlvbos";
    our $VERSION = '1.2';
    our $with_debug=0;


    # write data to file (UTF-8)
    sub write_file($$) {
		if(! defined $_[0]) { croak "filename not defined!"; }
		if(! defined $_[1]) { croak "value not defined!"; }
		if($_[0] eq "" || $_[1] eq ""){
			die "invalid parameters.";
		}
		my $filename = $_[0];
        my $value = $_[1];
        open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
        print $fh $value;
        close($fh) || (croak "could not close file '$filename', $!");
        return 1;
    }

    # this will patch the mods-xml as a workaround for bugs in LOCs xslt files
    sub patch_mods($) {
		if(! defined $_[0]) { croak "modsobject not defined!"; }
		if($_[0] eq ""){ die "invalid parameters."; }
        my $modsobj = shift; # mods expected as XML Parser object
        # TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1]
        my $xslt_patch_string = <<'PATCH';
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xmlns:mods="http://www.loc.gov/mods/v3"
    xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd"
    exclude-result-prefixes="xs"
    version="1.0">
    <xsl:output encoding="UTF-8" indent="yes" method="xml"/>
    <xsl:strip-space elements="*"/>
    <xsl:template match="//mods:mods/mods:relatedItem[mods:internetMediaType]">
        <xsl:comment>patched wrong //mods:mods/mods:relatedItem[mods:internetMediaType]</xsl:comment>
    </xsl:template>
    <xsl:template match="@* | node()">
        <xsl:copy>
            <xsl:apply-templates select="@* | node()"/>
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>
PATCH
        my $xslt = XML::LibXSLT->new();
        my $xslt_patch = XML::LibXML->load_xml(string => $xslt_patch_string, no_cdata => 1);
        my $stylesheet = $xslt->parse_stylesheet($xslt_patch);
        my $result = $stylesheet->transform($modsobj);
        return $result;
    }

    sub patch_marc_response($) {
		if(! defined $_[0]) { croak "marcobject not defined!"; }
		if($_[0] eq ""){ die "invalid parameters."; }
        my $marcobj = shift; # marcobj expected as XML Parser object
        my $xslt_patch_string = <<'PATCH2';
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.loc.gov/MARC21/slim" xmlns:srw="http://www.loc.gov/zing/srw/"
    exclude-result-prefixes="srw" version="1.0">
    <xsl:template match="/record">
        <xsl:element name="collection">
            <xsl:element name="record" namespace="http://www.loc.gov/MARC21/slim">
                <xsl:apply-templates select="@*"/>
                <xsl:apply-templates select="node()"/>
            </xsl:element>
        </xsl:element>
    </xsl:template>
    <xsl:template match="*">
        <xsl:element name="{local-name()}" namespace="http://www.loc.gov/MARC21/slim">
            <xsl:apply-templates select="node() | @*"/>
        </xsl:element>
    </xsl:template>
    <xsl:template match="@*">
        <xsl:attribute name="{local-name()}">
            <xsl:value-of select="."/>
        </xsl:attribute>
    </xsl:template>
</xsl:stylesheet>
PATCH2
        my $xslt = XML::LibXSLT->new();
        my $xslt_patch = XML::LibXML->load_xml(string => $xslt_patch_string, no_cdata => 1);
        my $stylesheet = $xslt->parse_stylesheet($xslt_patch);
        my $result = $stylesheet->transform($marcobj);
        return $result;
    }

    # check MARC21 utility xsl
    sub check_marc21_utility {
		if(! defined $_[0]) { croak "xsl directory not defined!"; }
		if(! defined $_[1]) { croak "user agent not defined!"; }
		if($_[0] eq "" || $_[1] eq ""){
			die "invalid parameters.";
		}
        my $xsl_dir = shift;
        my $ua = shift;
        my $marc_utils_basename = path($marc_utils_url)->basename;
        my $marc_utils_path = path($xsl_dir)->child($marc_utils_basename);
        if (!$marc_utils_path->is_file) {
            say "Downloading MARC21 utility xsl '$marc_utils_url'";
            my $result = $ua->get($marc_utils_url);
            if ($result->is_error) {
                croak "Failed to download '$marc_utils_url', " . $result->error_as_HTML;
            }
            say "Saving MARC21 utility xsl to file '$marc_utils_path'";
            my $xsl = $result->decoded_content;
            write_file($marc_utils_path, $xsl);
        }
        return $marc_utils_path;
    }

    # check MARC21->MODS xsl
    sub check_marc21_mods_xsl {
		if(! defined $_[0]) { croak "xsl directory not defined!"; }
		if(! defined $_[1]) { croak "user agent not defined!"; }
		if($_[0] eq "" || $_[1] eq ""){
			die "invalid parameters.";
		}
        my $xsl_dir = shift;
        my $ua = shift;
        my $marc_mods_basename = path($marc_mods_url)->basename;
        my $marc_mods_path = path($xsl_dir)->child($marc_mods_basename)->stringify;
        my $marc_mods_patched_basename = path($marc_mods_url)->basename(".xsl") . ".patched.xsl";
        my $marc_mods_patched_path = path($xsl_dir)->child($marc_mods_patched_basename);
        if (! $marc_mods_patched_path->is_file) {
            say "Downloading MARC21->MODS xsl '$marc_mods_url'";
            my $result = $ua->get($marc_mods_url);
            if ($result->is_error) {
                croak "Failed to download '$marc_mods_url', " . $result->error_as_HTML;
            }
            say "Modifying MARC21->MODS xsl for offline use";
            my $xsl = $result->decoded_content;
            write_file($marc_mods_path, $xsl);
            my $xsl_modified = $xsl;
            my $marc_utils_path = check_marc21_utility( $xsl_dir, $ua);
            $xsl_modified =~ s#$marc_utils_url#$marc_utils_path#g;
            say "Saving MARC21->MODS xsl to file '$marc_mods_path'";
            write_file($marc_mods_patched_path, $xsl_modified);
        }
        return $marc_mods_patched_path;
    }

    sub check_xsl_directory {
        # check xsl directory
        my $xsl_dir = path(__FILE__)->parent->realpath->parent->child("xsl");
        if (! $xsl_dir->is_dir) {
            say "Rebuilding XSL directory '$xsl_dir'";
            $xsl_dir->mkpath() || confess("could not mkdir '$xsl_dir', $!");
        }
        return $xsl_dir;
    }

    # specification SRU/SRW BSZ: https://wiki.k10plus.de/pages/viewpage.action?pageId=132874251
    sub get_mods_from($$$$) {
		if(! defined $_[0]) { croak "url not defined!"; }
		if(! defined $_[1]) { croak "ppn not defined!"; }
		if(! defined $_[2]) { croak "key not defined!"; }
		if(! defined $_[3]) { croak "schema not defined!"; }
		if($_[0] eq "" || $_[1] eq "" || $_[2] eq "" || $_[3] eq ""){
			die "invalid parameters.";
		}
        # $mods = ($url, $ppn, $searchkey, $recordschema)
        my $url = shift;
        my $ppn = shift; # example: "457035137"  for "Der Fichtelberg"
        my $key = shift;
        my $schema = shift;

        #### where to find XSLT
		
        my $ua = LWP::UserAgent->new;
        $ua->agent("MyApp/0.1 ");
        $ua->timeout(3600); #1h

        my $xsl_dir = check_xsl_directory();
        check_marc21_utility($xsl_dir, $ua);
        check_marc21_mods_xsl($xsl_dir, $ua);

        my $srubase = $url;      # host
        my $srusearchkey = $key; # SRU search key
        my $sruvalue = $ppn;
        my $srumaxrecords = 1;
        my $srustartrecord = 1;
        my $sruschema = $schema;
        my $sru = "${srubase}?version=1.1&query=${srusearchkey}%3D${sruvalue}&operation=searchRetrieve&maximumRecords=${srumaxrecords}&startRecord=${srustartrecord}&recordSchema=${sruschema}";
        if ($with_debug) {say "catalog-URL='$sru'";}
        my $response = $ua->get($sru); # ask SWB for given PPN
        if ($response->is_success) {
            # parse ZiNG repsonse, extract MARC-data
            my $xp = XML::XPath->new($response->decoded_content);
            my $parser = XML::LibXML->new();
            if ($with_debug) {
                say "write DEBUG_${ppn}_response.xml";
                write_file("DEBUG_${ppn}_response.xml", $response->decoded_content);
            }
            my $recordData = $xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*');
            if (!$recordData) { croak("ERROR: Did not get any <recordData/> for PPN '$ppn' using '$sru'");}
            my $marcblob = $parser->parse_string($recordData);


            my $marcblob_patched = patch_marc_response($marcblob);
            if ($with_debug) {
                say "write DEBUG_${ppn}_marc_unpatched.xml";
                write_file("DEBUG_${ppn}_marc_unpatched.xml", $marcblob);
                say "write DEBUG_${ppn}_marc.xml";
                write_file("DEBUG_${ppn}_marc.xml", $marcblob_patched);
            }

            my $marc_mods_patched_path = check_marc21_mods_xsl($xsl_dir, $ua);
            my $xslt = XML::LibXSLT->new();
            my $marcmods = XML::LibXML->load_xml(location => $marc_mods_patched_path, no_cdata => 1);
            my $stylesheet = $xslt->parse_stylesheet($marcmods);
            my $marc = $parser->parse_string($marcblob_patched);
            my $result = $stylesheet->transform($marc);
            if ($with_debug) {
                say "write DEBUG_${ppn}_unpatched_mods.xml";
                write_file("DEBUG_${ppn}_unpatched_mods.xml", $stylesheet->output_string($result));
            }
            $result = patch_mods($result);
            my $result_string = $stylesheet->output_string($result);
            return $result_string;
        }
        else {
            carp("Problem asking catalogue at $url using $ppn");
        }
        return;
    }
	
	sub create_filecopyhash {
		if(! defined $_[0]) { croak "directory not defined!"; }
		if(! defined $_[1]) { croak "content not defined!"; }
		if($_[0] eq "" || $_[1] eq ""){
			die "invalid parameters.";
		}
		my $directory = shift;
		my $content = shift;
		my %filecopyhash;
		my $wanted=sub {
			if (-d $_) {
				# dir, do nothing
				();
			} else {
				my $file=$File::Find::name;
				# system unabhängige pfade
				if ($file !~ m#^[-A-Za-z0-9_\.:\\/]+$#) {
					confess("file '$file' does not match regex '^[-A-Za-z0-9_\.:\\/]+\$'");
				}
				my $source = $file;
				$filecopyhash{$source}->{'source'}=$file;
				$file=~s#^$directory/?##;
				$filecopyhash{$source}{'relative'}="data/$file";
				$filecopyhash{$source}{'target'}="$content/$file";
				my $fh;
				open($fh, "<", $source) or confess ("Can't open '$source', $!");
				binmode($fh);
				my $ctx = Digest::MD5->new;
				$ctx->addfile(*$fh);
				close ($fh);
				my $md5 = $ctx->hexdigest;
				$filecopyhash{$source}{'md5sum'}=$md5;
			}
		};
		finddepth($wanted, $directory);
		return \%filecopyhash;
	}

    sub prepare_dmd_section_with_ppn($) {
		if(! defined $_[0]) { croak "ppn not defined!"; }
        my $ppn = shift;
        my $mods = SLUB::LZA::SIPBuilder::get_mods_from($swb_url, $ppn, $searchkey, $recordschema);
        if ($with_debug) {
            SLUB::LZA::SIPBuilder::write_file("DEBUG_${ppn}_mods.xml", $mods);
        }
        # remove the <xml /> from beginning of the answer
        $mods=~ s#<\?xml version="1.0" encoding="UTF-8"\?>#<!-- removed xml header from mods part -->#;
        my $dmd =<<"DMD";
<mets:dmdSec ID="DMDLOG_0000">
  <!-- bibliographic metadata -->
  <mets:mdWrap MDTYPE="MODS">
    <mets:xmlData>
      $mods
    </mets:xmlData>
  </mets:mdWrap>
</mets:dmdSec>
DMD
        return $dmd;
    }

    sub prepare_dmd_section_with_noppn($) {
		if(! defined $_[0]) { croak "noppn not defined!"; }
        my $noppn = shift;
        my $mods =<<"MODS";
<mods version="3.6"
    xmlns="http://www.loc.gov/mods/v3"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
    <identifier>$noppn</identifier>
</mods>
MODS
        my $dmd =<<"DMD";
<mets:dmdSec ID="DMDLOG_0000">
  <!-- bibliographic metadata -->
  <mets:mdWrap MDTYPE="MODS">
    <mets:xmlData>
      $mods
    </mets:xmlData>
  </mets:mdWrap>
</mets:dmdSec>
DMD
        return $dmd;
    }

    sub prepare_amd_section($$$$$$) {
		if(! defined $_[0]) { croak "export to archive date not defined!"; }
		if(! defined $_[1]) { croak "external workflow not defined!"; }
		if(! defined $_[2]) { croak "external id not defined!"; }
		if(! defined $_[3]) { croak "external conservation flag not defined!"; }
		if(! defined $_[4]) { croak "external isil not defined!"; }
		if(! defined $_[5]) { croak "external value description not defined!"; }
		if($_[0] eq "" || $_[1] eq "" || $_[2] eq "" || $_[3] eq "" || $_[5] eq ""){
			die "invalid parameters.";
		}
        my $export_to_archive_date = shift;
        my $external_workflow = shift;
        my $external_id = shift;
        my $external_conservation_flag = shift;
        my $external_isil = shift;
        my $external_value_descr = decode_utf8(shift);
		my $amd;
		
		if ($external_isil eq '') {
			$amd =<<"AMD";
<mets:amdSec ID="AMD">
        <!-- SIP metadata for automated processing by submission application -->
        <mets:techMD ID="ARCHIVE">
            <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="ARCHIVE">
                <mets:xmlData>
                    <archive:record version="v2019.2" xmlns:archive="http://slub-dresden.de/slubarchiv">
                        <archive:exportToArchiveDate>$export_to_archive_date</archive:exportToArchiveDate>
                        <archive:externalId>$external_id</archive:externalId>
                        <archive:externalWorkflow>$external_workflow</archive:externalWorkflow>
                        <archive:hasConservationReason>$external_conservation_flag</archive:hasConservationReason>
                        <archive:archivalValueDescription>$external_value_descr</archive:archivalValueDescription>
                    </archive:record>
                </mets:xmlData>
            </mets:mdWrap>
        </mets:techMD>
    </mets:amdSec>
AMD
		} else {
			$amd =<<"AMD";
<mets:amdSec ID="AMD">
        <!-- SIP metadata for automated processing by submission application -->
        <mets:techMD ID="ARCHIVE">
            <mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="ARCHIVE">
                <mets:xmlData>
                    <archive:record version="v2019.2" xmlns:archive="http://slub-dresden.de/slubarchiv">
                        <archive:exportToArchiveDate>$export_to_archive_date</archive:exportToArchiveDate>
                        <archive:externalId>$external_id</archive:externalId>
                        <archive:externalWorkflow>$external_workflow</archive:externalWorkflow>
                        <archive:hasConservationReason>$external_conservation_flag</archive:hasConservationReason>
                        <archive:externalIsilId>$external_isil</archive:externalIsilId>
                        <archive:archivalValueDescription>$external_value_descr</archive:archivalValueDescription>
                    </archive:record>
                </mets:xmlData>
            </mets:mdWrap>
        </mets:techMD>
    </mets:amdSec>
AMD
		}
        return $amd;
    }

    sub prepare_files_sections($) {
		if(! defined $_[0]) { croak "filecopyhash not defined!"; }
        my $filecopyhash = shift;
        my @fsec;
        my $i=0;
        foreach my $fkey (sort keys (%{$filecopyhash})) {
            push @fsec, sprintf("<mets:file ID=\"FILE_%015u_LZA\" CHECKSUMTYPE=\"MD5\" CHECKSUM=\"%s\">", $i, $filecopyhash->{$fkey}->{"md5sum"});
            push @fsec, sprintf("<mets:FLocat xmlns:xlink=\"http://www.w3.org/1999/xlink\" LOCTYPE=\"URL\" xlink:href=\"file://%s\"/>", $filecopyhash->{$fkey}->{"relative"});
            push @fsec, "</mets:file>";
            $i++;
        }
        my $files = join("\n", @fsec);
        my $filesec=<<"FILESEC";
<mets:fileSec>
  <mets:fileGrp USE="LZA">
  $files
  </mets:fileGrp>
</mets:fileSec>
FILESEC
        return $filesec;
    }

    sub prepare_struct_map($) {
		if(! defined $_[0]) { croak "filecopyhash not defined!"; }
        my $filecopyhash = shift;
        my @ssec;
        my $i=0;
        foreach my $fkey (sort keys (%{$filecopyhash})) {
            push @ssec, sprintf("<mets:div ID=\"PHYS_%015u_LZA\" TYPE=\"fileorderSequence\">", $i);
            push @ssec, sprintf("<mets:fptr FILEID=\"FILE_%015u_LZA\" />", $i);
            push @ssec, "</mets:div>";
            $i++;
        }
        my $structs = join("\n", @ssec);
        my $structmap =<<"STRUCTMAP";
<mets:structMap TYPE="PHYSICAL">
  <mets:div ID="PHYS_0000" TYPE="ieDir">
  $structs
  </mets:div>
</mets:structMap>
STRUCTMAP
        return $structmap;
    }

	sub check_directory($$) {
		if(! defined $_[0]) { croak "fullname not defined!"; }
		if(! defined $_[1]) { croak "entry not defined!"; }
		if($_[0] eq "" || $_[1] eq ""){
			die "invalid parameters.";
		}
		my $fullname = shift;
		my $entry = shift;
		my $dir_err = '';
		# Die Gesamtlänge der relativen Pfade darf jeweils 255 Zeichen nicht übersteigen. 
		if (length($fullname) > 255){ 
			return $dir_err = "path to file: $fullname is too long, expected maximum 255 characters"; 
		}
		# Als Dateinamen und Verzeichnisnamen sind für die zu archivierenden Dateien die Zeichen A-Za-z0-9_.- sowie / für Verzeichnistrenner erlaubt.
		if ($entry !~ m#^[A-Za-z0-9_.-]+$#){ 
			return $dir_err = "you need to specify a valid file or directory (name: $entry) to (^[A-Za-z0-9_.-]+\$)"; 
		}
		# Relative Pfade mit Bestandteilen der Form "../" sind nicht erlaubt.
		if ($fullname =~ /\.\.\//){ 
			return $dir_err = "relativ path($fullname) in form '../' is not allowed"; 
		}
		return $dir_err;
	}
	
	sub validate_directory($) {
		if(! defined $_[0]) { croak "directory not defined!"; }
		if(! -d $_[0]){
			die "Could not find directory";
		}
		my $directory = shift;
		my @dirs = ($directory);
		my $dir_err = '';
		while (@dirs && ($dir_err eq '')) {
			my $thisdir = shift @dirs;
			if (!($thisdir)) {
				# dir, do nothing
				return $dir_err='';
			}
			opendir (my $dh, $thisdir) or die "Could not find $thisdir";
			while (my $entry = readdir $dh && ($dir_err eq '')) {
				next if $entry eq '.';
				next if $entry eq '..';
				
				my $fullname = "$thisdir/$entry";

				$dir_err = check_directory($fullname, $entry);

				if (-d $fullname) {
					push @dirs, $fullname;
				}
			}
			closedir $dh;
		}
		return $dir_err;
	}

    # end package

package main;
#===============================================================================

BEGIN{
    $INC{'SLUB/LZA/SIPBuilder.pm'} = 1; # needed because inlined module
}
return 1 if caller; # avoids main code running if module stuff is needed
use SLUB::LZA::SIPBuilder;
use Getopt::Long;
use Path::Tiny;
use Digest::MD5;
use constant buffer => 100 * 1024 * 1024; # use 100MB as Buffer
use File::Find;
use File::Copy qw(cp);
use Pod::Usage;

my $directory;
my $ppn;
my $noppn;
my $output;
my $external_id;
my $external_workflow;
my $external_isil="";
my $external_value_descr;
my $external_conservation_flag;

my $help;
my $man;

GetOptions(
    "IE_directory=s"                => \$directory,                     	# required
    "ppn=s"                         => \$ppn,                           	# semi-optional (choice 1 of 2)
    "noppn=s"                       => \$noppn,                         	# semi-optional (choice 2 of 2)
    "SIP_output_path=s"             => \$output,                        	# required
    "external_id=s"                 => \$external_id,                   	# required
    "external_workflow=s"           => \$external_workflow,             	# required
    "external_ISIL=s"               => \$external_isil,                 	# optional, default: no ISIL
    "external_value_descr=s"        => \$external_value_descr,          	# required
    "external_conservation_flag"    => \$external_conservation_flag,    	# optional, default: no special conservation
    "debug"                         => \$SLUB::LZA::SIPBuilder::with_debug, # optional
    "help|?"                        => \$help,                          	# optional
    "man"                           => \$man,                           	# optional
) or pod2usage(2);


if ($help)                                  { pod2usage(1); }
if ($man)                                   { pod2usage(-exitval => 0, -verbose => 2); }
my $dir_err = SLUB::LZA::SIPBuilder::validate_directory($directory);
if (!defined $directory)                    { confess("you need to specify an IE directory, which needs to be archived"); }
if ((defined $directory) && ($dir_err ne '')) { confess($dir_err); }
if ((defined $ppn) && (defined $noppn))     { confess("you can only specify either -ppn or -noppn"); }
if ((!defined $ppn) && (!defined $noppn))   { confess("you need to specify a PPN with -ppn or use --noppn"); }
if (!defined $output)                       { confess("you need to specify an output path, where the SIP will be stored"); }
if (!defined $external_id)					{ confess("you need to specify external ID"); }
if ((defined $external_id) && ($external_id !~ m#^[a-z0-9_-]+$#)) { confess("you need to specify a valid external ID (^[a-z0-9_-]+\$)"); }
if (!defined $external_workflow)			{ confess("you need to specify external workflow"); }
if ((defined $external_workflow) && ($external_workflow !~ m#^[a-z0-9_-]+$#)) { confess("you need to specify a valid external workflow (^[a-z0-9_-]+\$)"); }
if (!$external_value_descr)                 { confess("you need to specify an external value description (reason for archiving)"); }
if (!defined $external_conservation_flag)   { $external_conservation_flag="false"; } else { $external_conservation_flag="true"; }
if (! -d $directory)                        { confess("you need to specify an IE directory, which needs to be archived, $!"); }
$directory = path($directory)->realpath->stringify;
path($output)->mkpath;
$output = path($output)->realpath->stringify;

#===============================================================================

sub main {
    # get date
    my $export_to_archive_date = DateTime->now(time_zone=>'local')->iso8601();
    my $file_date = $export_to_archive_date;
    $file_date =~ s/T/_/g; # replace 'T' with '_'
    $file_date =~ s/:/-/g; # replace ':' with '-'
    # prepare dirs
    my $sip_root_dir = (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
    my $content = path($output)->child($sip_root_dir)->child("data")->stringify;
    path($content)->mkpath;
    my $filecopyhash = SLUB::LZA::SIPBuilder::create_filecopyhash($directory, $content);

    # prepare dmd-sec
    my $dmd = (defined $ppn)? SLUB::LZA::SIPBuilder::prepare_dmd_section_with_ppn( $ppn ) : SLUB::LZA::SIPBuilder::prepare_dmd_section_with_noppn( $noppn );
    # prepare amd-sec
    my $amd = SLUB::LZA::SIPBuilder::prepare_amd_section(
        $export_to_archive_date,
        $external_workflow,
        $external_id,
        $external_conservation_flag,
        $external_isil,
        $external_value_descr
    );
    # create fileSec
    my $filesec = SLUB::LZA::SIPBuilder::prepare_files_sections($filecopyhash);
    # prepare structmap
    my $structmap = SLUB::LZA::SIPBuilder::prepare_struct_map($filecopyhash);
    # create sip.xml
    my $sip =<<"METS";
<?xml version="1.0" encoding="utf-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version111/mets.xsd">
    $dmd
    $amd
    $filesec
    $structmap
</mets:mets>
METS

    # write stuff out
	SLUB::LZA::SIPBuilder::write_file( path($output)->child($sip_root_dir)->child("sip.xml")->stringify, $sip );
	# copy source to target
	foreach my $source (sort keys (%{$filecopyhash})) {
		my $target = path($filecopyhash->{$source}->{"target"})->stringify; # CHECK ON WINDOWS
		my $basename = path($target)->parent->stringify;
		if (! -d $basename) {
			path($basename)->mkpath;
		}
		cp($source, $target, buffer) || confess ("could not copy from '$source' to '$target', $!");
	}
	say "SIP '$sip_root_dir' build successfully in '$output'";
    
    return;
}


#===============================================================================

main();

#===============================================================================


__END__

=pod

=head1 NAME

preingest tool "SIP builder" script to create SIPs for SLUBArchive

=head1 SYNOPSIS

slubsipbuilder.pl  [options]

 Options:
        -help                           brief help message
        -man                            full documentation

        -IE_directory=<IE dir>          existing IE directory (absolute path!)
        -ppn=<ppn>|-noppn=<noppn>       SWB-PPN or any identifier (uses minimalistic MODS)
        -SIP_output_path=<target dir>   where to put the SIP dir (absolute path!)
        -external_id=<id>               mandatory, should be uniqe ID
        -external_workflow=<workflow>   mandatory, should be uniqe workflow name
        -external_ISIL=<isil>           optional, ISIL number of library
        -external_value_descr=<text>    mandatory, the reason why to archive
        -external_conservation_flag     optional, if set no other "original" still exists

slubsipbuilder.pl --IE_directory=/export_dir_kitodo/10008 --ppn=457035137 --SIP_output_path=/tmp/mysip --external_id=10008 --external_workflow=kitodo --external_ISIL=DE-14 --external_value_descr="Gesetzlicher Auftrag"

=head1 OPTIONS

=over 8

=item B<-help>

Print a brief help message and exits.

=back

=head1 DESCRIPTION

B<This program> will process the given IE directory, add bibliographic metadata from catalogue with given PICA number and check and create a SIP directory ready for SLUBarchiv

=cut
