Skip to content
Snippets Groups Projects

SLUB SIP Builder für BagIt 1.0

Merged Serhiy Bolkun requested to merge bagit into master
Files
56
+ 0
690
#!/usr/bin/env perl
#===============================================================================
#
# FILE: slubsipbuilder.pl
#
# USAGE: ./slubsipbuilder.pl
#
# DESCRIPTION: A CLI tool to create a valid SIP for SLUBArchiv
#
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: related to official document
# "SIP Spezifikation (v1.5)"
# AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de)
# ORGANIZATION: SLUB
# VERSION: 1.1
# CREATED: 2019-07-23
#===============================================================================
use strict;
use warnings;
use Carp;
use 5.28.0;
package SLUB::LZA::SIPBuilder;
use DateTime::Format::ISO8601;
use File::Copy qw(cp);
use File::Find;
use Path::Tiny;
use LWP::UserAgent; # to get MARC data
use MARC::Record;
use XML::LibXML;
use XML::LibXSLT;
use XML::XPath;
use Carp;
use Encode;
my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
my $marc_utils_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl';
my $swb_url = 'https://sru.bsz-bw.de/swb';
my $searchkey = "pica.swn";
my $recordschema = "marcxmlvbos";
our $VERSION = '1.2';
our $with_debug=0;
# write data to file (UTF-8)
sub write_file($$) {
if(! defined $_[0]) { croak "filename not defined!"; }
if(! defined $_[1]) { croak "value not defined!"; }
if($_[0] eq "" || $_[1] eq ""){
die "invalid parameters.";
}
my $filename = $_[0];
my $value = $_[1];
open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
print $fh $value;
close($fh) || (croak "could not close file '$filename', $!");
return 1;
}
# this will patch the mods-xml as a workaround for bugs in LOCs xslt files
sub patch_mods($) {
if(! defined $_[0]) { croak "modsobject not defined!"; }
if($_[0] eq ""){ die "invalid parameters."; }
my $modsobj = shift; # mods expected as XML Parser object
# TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1]
my $xslt_patch_string = <<'PATCH';
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd"
exclude-result-prefixes="xs"
version="1.0">
<xsl:output encoding="UTF-8" indent="yes" method="xml"/>
<xsl:strip-space elements="*"/>
<xsl:template match="//mods:mods/mods:relatedItem[mods:internetMediaType]">
<xsl:comment>patched wrong //mods:mods/mods:relatedItem[mods:internetMediaType]</xsl:comment>
</xsl:template>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
PATCH
my $xslt = XML::LibXSLT->new();
my $xslt_patch = XML::LibXML->load_xml(string => $xslt_patch_string, no_cdata => 1);
my $stylesheet = $xslt->parse_stylesheet($xslt_patch);
my $result = $stylesheet->transform($modsobj);
return $result;
}
sub patch_marc_response($) {
if(! defined $_[0]) { croak "marcobject not defined!"; }
if($_[0] eq ""){ die "invalid parameters."; }
my $marcobj = shift; # marcobj expected as XML Parser object
my $xslt_patch_string = <<'PATCH2';
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.loc.gov/MARC21/slim" xmlns:srw="http://www.loc.gov/zing/srw/"
exclude-result-prefixes="srw" version="1.0">
<xsl:template match="/record">
<xsl:element name="collection">
<xsl:element name="record" namespace="http://www.loc.gov/MARC21/slim">
<xsl:apply-templates select="@*"/>
<xsl:apply-templates select="node()"/>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="*">
<xsl:element name="{local-name()}" namespace="http://www.loc.gov/MARC21/slim">
<xsl:apply-templates select="node() | @*"/>
</xsl:element>
</xsl:template>
<xsl:template match="@*">
<xsl:attribute name="{local-name()}">
<xsl:value-of select="."/>
</xsl:attribute>
</xsl:template>
</xsl:stylesheet>
PATCH2
my $xslt = XML::LibXSLT->new();
my $xslt_patch = XML::LibXML->load_xml(string => $xslt_patch_string, no_cdata => 1);
my $stylesheet = $xslt->parse_stylesheet($xslt_patch);
my $result = $stylesheet->transform($marcobj);
return $result;
}
# check MARC21 utility xsl
sub check_marc21_utility {
if(! defined $_[0]) { croak "xsl directory not defined!"; }
if(! defined $_[1]) { croak "user agent not defined!"; }
if($_[0] eq "" || $_[1] eq ""){
die "invalid parameters.";
}
my $xsl_dir = shift;
my $ua = shift;
my $marc_utils_basename = path($marc_utils_url)->basename;
my $marc_utils_path = path($xsl_dir)->child($marc_utils_basename);
if (!$marc_utils_path->is_file) {
say "Downloading MARC21 utility xsl '$marc_utils_url'";
my $result = $ua->get($marc_utils_url);
if ($result->is_error) {
croak "Failed to download '$marc_utils_url', " . $result->error_as_HTML;
}
say "Saving MARC21 utility xsl to file '$marc_utils_path'";
my $xsl = $result->decoded_content;
write_file($marc_utils_path, $xsl);
}
return $marc_utils_path;
}
# check MARC21->MODS xsl
sub check_marc21_mods_xsl {
if(! defined $_[0]) { croak "xsl directory not defined!"; }
if(! defined $_[1]) { croak "user agent not defined!"; }
if($_[0] eq "" || $_[1] eq ""){
die "invalid parameters.";
}
my $xsl_dir = shift;
my $ua = shift;
my $marc_mods_basename = path($marc_mods_url)->basename;
my $marc_mods_path = path($xsl_dir)->child($marc_mods_basename)->stringify;
my $marc_mods_patched_basename = path($marc_mods_url)->basename(".xsl") . ".patched.xsl";
my $marc_mods_patched_path = path($xsl_dir)->child($marc_mods_patched_basename);
if (! $marc_mods_patched_path->is_file) {
say "Downloading MARC21->MODS xsl '$marc_mods_url'";
my $result = $ua->get($marc_mods_url);
if ($result->is_error) {
croak "Failed to download '$marc_mods_url', " . $result->error_as_HTML;
}
say "Modifying MARC21->MODS xsl for offline use";
my $xsl = $result->decoded_content;
write_file($marc_mods_path, $xsl);
my $xsl_modified = $xsl;
my $marc_utils_path = check_marc21_utility( $xsl_dir, $ua);
$xsl_modified =~ s#$marc_utils_url#$marc_utils_path#g;
say "Saving MARC21->MODS xsl to file '$marc_mods_path'";
write_file($marc_mods_patched_path, $xsl_modified);
}
return $marc_mods_patched_path;
}
sub check_xsl_directory {
# check xsl directory
my $xsl_dir = path(__FILE__)->parent->realpath->parent->child("xsl");
if (! $xsl_dir->is_dir) {
say "Rebuilding XSL directory '$xsl_dir'";
$xsl_dir->mkpath() || confess("could not mkdir '$xsl_dir', $!");
}
return $xsl_dir;
}
# specification SRU/SRW BSZ: https://wiki.k10plus.de/pages/viewpage.action?pageId=132874251
sub get_mods_from($$$$) {
if(! defined $_[0]) { croak "url not defined!"; }
if(! defined $_[1]) { croak "ppn not defined!"; }
if(! defined $_[2]) { croak "key not defined!"; }
if(! defined $_[3]) { croak "schema not defined!"; }
if($_[0] eq "" || $_[1] eq "" || $_[2] eq "" || $_[3] eq ""){
die "invalid parameters.";
}
# $mods = ($url, $ppn, $searchkey, $recordschema)
my $url = shift;
my $ppn = shift; # example: "457035137" for "Der Fichtelberg"
my $key = shift;
my $schema = shift;
#### where to find XSLT
my $ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1 ");
$ua->timeout(3600); #1h
my $xsl_dir = check_xsl_directory();
check_marc21_utility($xsl_dir, $ua);
check_marc21_mods_xsl($xsl_dir, $ua);
my $srubase = $url; # host
my $srusearchkey = $key; # SRU search key
my $sruvalue = $ppn;
my $srumaxrecords = 1;
my $srustartrecord = 1;
my $sruschema = $schema;
my $sru = "${srubase}?version=1.1&query=${srusearchkey}%3D${sruvalue}&operation=searchRetrieve&maximumRecords=${srumaxrecords}&startRecord=${srustartrecord}&recordSchema=${sruschema}";
if ($with_debug) {say "catalog-URL='$sru'";}
my $response = $ua->get($sru); # ask SWB for given PPN
if ($response->is_success) {
# parse ZiNG repsonse, extract MARC-data
my $xp = XML::XPath->new($response->decoded_content);
my $parser = XML::LibXML->new();
if ($with_debug) {
say "write DEBUG_${ppn}_response.xml";
write_file("DEBUG_${ppn}_response.xml", $response->decoded_content);
}
my $recordData = $xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*');
if (!$recordData) { croak("ERROR: Did not get any <recordData/> for PPN '$ppn' using '$sru'");}
my $marcblob = $parser->parse_string($recordData);
my $marcblob_patched = patch_marc_response($marcblob);
if ($with_debug) {
say "write DEBUG_${ppn}_marc_unpatched.xml";
write_file("DEBUG_${ppn}_marc_unpatched.xml", $marcblob);
say "write DEBUG_${ppn}_marc.xml";
write_file("DEBUG_${ppn}_marc.xml", $marcblob_patched);
}
my $marc_mods_patched_path = check_marc21_mods_xsl($xsl_dir, $ua);
my $xslt = XML::LibXSLT->new();
my $marcmods = XML::LibXML->load_xml(location => $marc_mods_patched_path, no_cdata => 1);
my $stylesheet = $xslt->parse_stylesheet($marcmods);
my $marc = $parser->parse_string($marcblob_patched);
my $result = $stylesheet->transform($marc);
if ($with_debug) {
say "write DEBUG_${ppn}_unpatched_mods.xml";
write_file("DEBUG_${ppn}_unpatched_mods.xml", $stylesheet->output_string($result));
}
$result = patch_mods($result);
my $result_string = $stylesheet->output_string($result);
return $result_string;
}
else {
carp("Problem asking catalogue at $url using $ppn");
}
return;
}
sub create_filecopyhash {
if(! defined $_[0]) { croak "directory not defined!"; }
if(! defined $_[1]) { croak "content not defined!"; }
if($_[0] eq "" || $_[1] eq ""){
die "invalid parameters.";
}
my $directory = shift;
my $content = shift;
my %filecopyhash;
my $wanted=sub {
if (-d $_) {
# dir, do nothing
();
} else {
my $file=$File::Find::name;
# system unabhängige pfade
if ($file !~ m#^[-A-Za-z0-9_\.:\\/]+$#) {
confess("file '$file' does not match regex '^[-A-Za-z0-9_\.:\\/]+\$'");
}
my $source = $file;
$filecopyhash{$source}->{'source'}=$file;
$file=~s#^$directory/?##;
$filecopyhash{$source}{'relative'}="data/$file";
$filecopyhash{$source}{'target'}="$content/$file";
my $fh;
open($fh, "<", $source) or confess ("Can't open '$source', $!");
binmode($fh);
my $ctx = Digest::MD5->new;
$ctx->addfile(*$fh);
close ($fh);
my $md5 = $ctx->hexdigest;
$filecopyhash{$source}{'md5sum'}=$md5;
}
};
finddepth($wanted, $directory);
return \%filecopyhash;
}
sub prepare_dmd_section_with_ppn($) {
if(! defined $_[0]) { croak "ppn not defined!"; }
my $ppn = shift;
my $mods = SLUB::LZA::SIPBuilder::get_mods_from($swb_url, $ppn, $searchkey, $recordschema);
if ($with_debug) {
SLUB::LZA::SIPBuilder::write_file("DEBUG_${ppn}_mods.xml", $mods);
}
# remove the <xml /> from beginning of the answer
$mods=~ s#<\?xml version="1.0" encoding="UTF-8"\?>#<!-- removed xml header from mods part -->#;
my $dmd =<<"DMD";
<mets:dmdSec ID="DMDLOG_0000">
<!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
$mods
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
DMD
return $dmd;
}
sub prepare_dmd_section_with_noppn($) {
if(! defined $_[0]) { croak "noppn not defined!"; }
my $noppn = shift;
my $mods =<<"MODS";
<mods version="3.6"
xmlns="http://www.loc.gov/mods/v3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
<identifier>$noppn</identifier>
</mods>
MODS
my $dmd =<<"DMD";
<mets:dmdSec ID="DMDLOG_0000">
<!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
$mods
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
DMD
return $dmd;
}
sub prepare_amd_section($$$$$$) {
if(! defined $_[0]) { croak "export to archive date not defined!"; }
if(! defined $_[1]) { croak "external workflow not defined!"; }
if(! defined $_[2]) { croak "external id not defined!"; }
if(! defined $_[3]) { croak "external conservation flag not defined!"; }
if(! defined $_[4]) { croak "external isil not defined!"; }
if(! defined $_[5]) { croak "external value description not defined!"; }
if($_[0] eq "" || $_[1] eq "" || $_[2] eq "" || $_[3] eq "" || $_[5] eq ""){
die "invalid parameters.";
}
my $export_to_archive_date = shift;
my $external_workflow = shift;
my $external_id = shift;
my $external_conservation_flag = shift;
my $external_isil = shift;
my $external_value_descr = decode_utf8(shift);
my $amd;
if ($external_isil eq '') {
$amd =<<"AMD";
<mets:amdSec ID="AMD">
<!-- SIP metadata for automated processing by submission application -->
<mets:techMD ID="ARCHIVE">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="ARCHIVE">
<mets:xmlData>
<archive:record version="v2019.2" xmlns:archive="http://slub-dresden.de/slubarchiv">
<archive:exportToArchiveDate>$export_to_archive_date</archive:exportToArchiveDate>
<archive:externalId>$external_id</archive:externalId>
<archive:externalWorkflow>$external_workflow</archive:externalWorkflow>
<archive:hasConservationReason>$external_conservation_flag</archive:hasConservationReason>
<archive:archivalValueDescription>$external_value_descr</archive:archivalValueDescription>
</archive:record>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
</mets:amdSec>
AMD
} else {
$amd =<<"AMD";
<mets:amdSec ID="AMD">
<!-- SIP metadata for automated processing by submission application -->
<mets:techMD ID="ARCHIVE">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="ARCHIVE">
<mets:xmlData>
<archive:record version="v2019.2" xmlns:archive="http://slub-dresden.de/slubarchiv">
<archive:exportToArchiveDate>$export_to_archive_date</archive:exportToArchiveDate>
<archive:externalId>$external_id</archive:externalId>
<archive:externalWorkflow>$external_workflow</archive:externalWorkflow>
<archive:hasConservationReason>$external_conservation_flag</archive:hasConservationReason>
<archive:externalIsilId>$external_isil</archive:externalIsilId>
<archive:archivalValueDescription>$external_value_descr</archive:archivalValueDescription>
</archive:record>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
</mets:amdSec>
AMD
}
return $amd;
}
sub prepare_files_sections($) {
if(! defined $_[0]) { croak "filecopyhash not defined!"; }
my $filecopyhash = shift;
my @fsec;
my $i=0;
foreach my $fkey (sort keys (%{$filecopyhash})) {
push @fsec, sprintf("<mets:file ID=\"FILE_%015u_LZA\" CHECKSUMTYPE=\"MD5\" CHECKSUM=\"%s\">", $i, $filecopyhash->{$fkey}->{"md5sum"});
push @fsec, sprintf("<mets:FLocat xmlns:xlink=\"http://www.w3.org/1999/xlink\" LOCTYPE=\"URL\" xlink:href=\"file://%s\"/>", $filecopyhash->{$fkey}->{"relative"});
push @fsec, "</mets:file>";
$i++;
}
my $files = join("\n", @fsec);
my $filesec=<<"FILESEC";
<mets:fileSec>
<mets:fileGrp USE="LZA">
$files
</mets:fileGrp>
</mets:fileSec>
FILESEC
return $filesec;
}
sub prepare_struct_map($) {
if(! defined $_[0]) { croak "filecopyhash not defined!"; }
my $filecopyhash = shift;
my @ssec;
my $i=0;
foreach my $fkey (sort keys (%{$filecopyhash})) {
push @ssec, sprintf("<mets:div ID=\"PHYS_%015u_LZA\" TYPE=\"fileorderSequence\">", $i);
push @ssec, sprintf("<mets:fptr FILEID=\"FILE_%015u_LZA\" />", $i);
push @ssec, "</mets:div>";
$i++;
}
my $structs = join("\n", @ssec);
my $structmap =<<"STRUCTMAP";
<mets:structMap TYPE="PHYSICAL">
<mets:div ID="PHYS_0000" TYPE="ieDir">
$structs
</mets:div>
</mets:structMap>
STRUCTMAP
return $structmap;
}
sub check_directory($$) {
if(! defined $_[0]) { croak "fullname not defined!"; }
if(! defined $_[1]) { croak "entry not defined!"; }
if($_[0] eq "" || $_[1] eq ""){
die "invalid parameters.";
}
my $fullname = shift;
my $entry = shift;
my $dir_err = '';
# Die Gesamtlänge der relativen Pfade darf jeweils 255 Zeichen nicht übersteigen.
if (length($fullname) > 255){
return $dir_err = "path to file: $fullname is too long, expected maximum 255 characters";
}
# Als Dateinamen und Verzeichnisnamen sind für die zu archivierenden Dateien die Zeichen A-Za-z0-9_.- sowie / für Verzeichnistrenner erlaubt.
if ($entry !~ m#^[A-Za-z0-9_.-]+$#){
return $dir_err = "you need to specify a valid file or directory (name: $entry) to (^[A-Za-z0-9_.-]+\$)";
}
# Relative Pfade mit Bestandteilen der Form "../" sind nicht erlaubt.
if ($fullname =~ /\.\.\//){
return $dir_err = "relativ path($fullname) in form '../' is not allowed";
}
return $dir_err;
}
sub validate_directory($) {
if(! defined $_[0]) { croak "directory not defined!"; }
if(! -d $_[0]){
die "Could not find directory";
}
my $directory = shift;
my @dirs = ($directory);
my $dir_err = '';
while (@dirs && ($dir_err eq '')) {
my $thisdir = shift @dirs;
if (!($thisdir)) {
# dir, do nothing
return $dir_err='';
}
opendir (my $dh, $thisdir) or die "Could not find $thisdir";
while (my $entry = readdir $dh && ($dir_err eq '')) {
next if $entry eq '.';
next if $entry eq '..';
my $fullname = "$thisdir/$entry";
$dir_err = check_directory($fullname, $entry);
if (-d $fullname) {
push @dirs, $fullname;
}
}
closedir $dh;
}
return $dir_err;
}
# end package
package main;
#===============================================================================
BEGIN{
$INC{'SLUB/LZA/SIPBuilder.pm'} = 1; # needed because inlined module
}
return 1 if caller; # avoids main code running if module stuff is needed
use SLUB::LZA::SIPBuilder;
use Getopt::Long;
use Path::Tiny;
use Digest::MD5;
use constant buffer => 100 * 1024 * 1024; # use 100MB as Buffer
use File::Find;
use File::Copy qw(cp);
use Pod::Usage;
my $directory;
my $ppn;
my $noppn;
my $output;
my $external_id;
my $external_workflow;
my $external_isil="";
my $external_value_descr;
my $external_conservation_flag;
my $help;
my $man;
GetOptions(
"IE_directory=s" => \$directory, # required
"ppn=s" => \$ppn, # semi-optional (choice 1 of 2)
"noppn=s" => \$noppn, # semi-optional (choice 2 of 2)
"SIP_output_path=s" => \$output, # required
"external_id=s" => \$external_id, # required
"external_workflow=s" => \$external_workflow, # required
"external_ISIL=s" => \$external_isil, # optional, default: no ISIL
"external_value_descr=s" => \$external_value_descr, # required
"external_conservation_flag" => \$external_conservation_flag, # optional, default: no special conservation
"debug" => \$SLUB::LZA::SIPBuilder::with_debug, # optional
"help|?" => \$help, # optional
"man" => \$man, # optional
) or pod2usage(2);
my $dir_err = SLUB::LZA::SIPBuilder::validate_directory($directory);
if ($help) { pod2usage(1); }
if ($man) { pod2usage(-exitval => 0, -verbose => 2); }
if (!defined $directory) { confess("you need to specify an IE directory, which needs to be archived"); }
if ((defined $directory) && ($dir_err ne '')) { confess($dir_err); }
if ((defined $ppn) && (defined $noppn)) { confess("you can only specify either -ppn or -noppn"); }
if ((!defined $ppn) && (!defined $noppn)) { confess("you need to specify a PPN with -ppn or use --noppn"); }
if (!defined $output) { confess("you need to specify an output path, where the SIP will be stored"); }
if (!defined $external_id) { confess("you need to specify external ID"); }
if ((defined $external_id) && ($external_id !~ m#^[a-z0-9_-]+$#)) { confess("you need to specify a valid external ID (^[a-z0-9_-]+\$)"); }
if (!defined $external_workflow) { confess("you need to specify external workflow"); }
if ((defined $external_workflow) && ($external_workflow !~ m#^[a-z0-9_-]+$#)) { confess("you need to specify a valid external workflow (^[a-z0-9_-]+\$)"); }
if (!$external_value_descr) { confess("you need to specify an external value description (reason for archiving)"); }
if (!defined $external_conservation_flag) { $external_conservation_flag="false"; } else { $external_conservation_flag="true"; }
if (! -d $directory) { confess("you need to specify an IE directory, which needs to be archived, $!"); }
$directory = path($directory)->realpath->stringify;
path($output)->mkpath;
$output = path($output)->realpath->stringify;
#===============================================================================
sub main {
# get date
my $export_to_archive_date = DateTime->now(time_zone=>'local')->iso8601();
my $file_date = $export_to_archive_date;
$file_date =~ s/T/_/g; # replace 'T' with '_'
$file_date =~ s/:/-/g; # replace ':' with '-'
# prepare dirs
my $sip_root_dir = (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
my $content = path($output)->child($sip_root_dir)->child("data")->stringify;
path($content)->mkpath;
my $filecopyhash = SLUB::LZA::SIPBuilder::create_filecopyhash($directory, $content);
# prepare dmd-sec
my $dmd = (defined $ppn)? SLUB::LZA::SIPBuilder::prepare_dmd_section_with_ppn( $ppn ) : SLUB::LZA::SIPBuilder::prepare_dmd_section_with_noppn( $noppn );
# prepare amd-sec
my $amd = SLUB::LZA::SIPBuilder::prepare_amd_section(
$export_to_archive_date,
$external_workflow,
$external_id,
$external_conservation_flag,
$external_isil,
$external_value_descr
);
# create fileSec
my $filesec = SLUB::LZA::SIPBuilder::prepare_files_sections($filecopyhash);
# prepare structmap
my $structmap = SLUB::LZA::SIPBuilder::prepare_struct_map($filecopyhash);
# create sip.xml
my $sip =<<"METS";
<?xml version="1.0" encoding="utf-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version111/mets.xsd">
$dmd
$amd
$filesec
$structmap
</mets:mets>
METS
# write stuff out
SLUB::LZA::SIPBuilder::write_file( path($output)->child($sip_root_dir)->child("sip.xml")->stringify, $sip );
# copy source to target
foreach my $source (sort keys (%{$filecopyhash})) {
my $target = path($filecopyhash->{$source}->{"target"})->stringify; # CHECK ON WINDOWS
my $basename = path($target)->parent->stringify;
if (! -d $basename) {
path($basename)->mkpath;
}
cp($source, $target, buffer) || confess ("could not copy from '$source' to '$target', $!");
}
say "SIP '$sip_root_dir' build successfully in '$output'";
return;
}
#===============================================================================
main();
#===============================================================================
__END__
=pod
=head1 NAME
preingest tool "SIP builder" script to create SIPs for SLUBArchive
=head1 SYNOPSIS
slubsipbuilder.pl [options]
Options:
-help brief help message
-man full documentation
-IE_directory=<IE dir> existing IE directory (absolute path!)
-ppn=<ppn>|-noppn=<noppn> SWB-PPN or any identifier (uses minimalistic MODS)
-SIP_output_path=<target dir> where to put the SIP dir (absolute path!)
-external_id=<id> mandatory, should be uniqe ID
-external_workflow=<workflow> mandatory, should be uniqe workflow name
-external_ISIL=<isil> optional, ISIL number of library
-external_value_descr=<text> mandatory, the reason why to archive
-external_conservation_flag optional, if set no other "original" still exists
slubsipbuilder.pl --IE_directory=/export_dir_kitodo/10008 --ppn=457035137 --SIP_output_path=/tmp/mysip --external_id=10008 --external_workflow=kitodo --external_ISIL=DE-14 --external_value_descr="Gesetzlicher Auftrag"
=head1 OPTIONS
=over 8
=item B<-help>
Print a brief help message and exits.
=back
=head1 DESCRIPTION
B<This program> will process the given IE directory, add bibliographic metadata from catalogue with given PICA number and check and create a SIP directory ready for SLUBarchiv
=cut
Loading