Skip to content
Snippets Groups Projects
Commit 4248c6fe authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

Merge branch 'feature_offline_xslt'

parents cd916fba 44eca77d
No related branches found
No related tags found
No related merge requests found
...@@ -10,44 +10,54 @@ ...@@ -10,44 +10,54 @@
# REQUIREMENTS: --- # REQUIREMENTS: ---
# BUGS: --- # BUGS: ---
# NOTES: related to official document # NOTES: related to official document
# "SIP Spezifikation (v1.4.1)" # "SIP Spezifikation (v1.4.2)"
# AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de) # AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de)
# ORGANIZATION: SLUB # ORGANIZATION: SLUB
# VERSION: 1.1 # VERSION: 1.1
# CREATED: 10.05.2016 # CREATED: 2019-07-23
#=============================================================================== #===============================================================================
use strict; use strict;
use warnings; use warnings;
use Carp; use Carp;
use 5.20.0; use 5.28.0;
use strict; package SLUB::LZA::SIPBuilder;
use warnings;
use Archive::Zip::SimpleZip qw($SimpleZipError);
use Cwd;
use DateTime::Format::ISO8601; use DateTime::Format::ISO8601;
use Digest::MD5 qw(md5);
use File::Basename;
use File::Copy qw(cp); use File::Copy qw(cp);
use File::Find; use File::Find;
use File::Path; use Path::Tiny;
use File::Slurp;
use Getopt::Long;
use LWP::UserAgent; # to get MARC data use LWP::UserAgent; # to get MARC data
use MARC::Record; use MARC::Record;
use Pod::Usage;
use XML::LibXML; use XML::LibXML;
use XML::LibXSLT; use XML::LibXSLT;
use XML::XPath; use XML::XPath;
use constant buffer => 100*1024*1024; # use 100MB as Buffer use Carp;
my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
my $with_debug = 0; my $marc_utils_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl';
my $swb_url = 'https://sru.bsz-bw.de/swb';
my $searchkey = "pica.swn";
my $recordschema = "marcxmlvbos";
our $VERSION = '1.2';
our $with_debug=0;
# write data to file (UTF-8)
sub write_file($$) {
my $filename = $_[0];
my $value = $_[1];
open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
print $fh $value;
close($fh) || (croak "could not close file '$filename', $!");
return 1;
}
# this will patch the mods-xml as a workaround for bugs in LOCs xslt files # this will patch the mods-xml as a workaround for bugs in LOCs xslt files
sub patch_mods($) { sub patch_mods($) {
my $modsobj = shift; # mods expected as XML Parser object my $modsobj = shift; # mods expected as XML Parser object
# TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1] # TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1]
my $xslt_patch_string =<<PATCH; my $xslt_patch_string = <<'PATCH';
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xs="http://www.w3.org/2001/XMLSchema"
...@@ -77,7 +87,7 @@ PATCH ...@@ -77,7 +87,7 @@ PATCH
sub patch_marc_response($) { sub patch_marc_response($) {
my $marcobj = shift; # marcobj expected as XML Parser object my $marcobj = shift; # marcobj expected as XML Parser object
my $xslt_patch_string =<<PATCH2; my $xslt_patch_string = <<'PATCH2';
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.loc.gov/MARC21/slim" xmlns:srw="http://www.loc.gov/zing/srw/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.loc.gov/MARC21/slim" xmlns:srw="http://www.loc.gov/zing/srw/"
...@@ -109,148 +119,199 @@ PATCH2 ...@@ -109,148 +119,199 @@ PATCH2
return $result; return $result;
} }
# check MARC21 utility xsl
sub check_marc21_utility {
my $xsl_dir = shift;
my $ua = shift;
my $marc_utils_basename = path($marc_utils_url)->basename;
my $marc_utils_path = path($xsl_dir)->child($marc_utils_basename);
if (!$marc_utils_path->is_file) {
say "Downloading MARC21 utility xsl '$marc_utils_url'";
my $result = $ua->get($marc_utils_url);
if ($result->is_error) {
croak "Failed to download '$marc_utils_url', " . $result->error_as_HTML;
}
say "Saving MARC21 utility xsl to file '$marc_utils_path'";
my $xsl = $result->decoded_content;
write_file($marc_utils_path, $xsl);
}
return $marc_utils_path;
}
# check MARC21->MODS xsl
sub check_marc21_mods_xsl {
my $xsl_dir = shift;
my $ua = shift;
my $marc_mods_basename = path($marc_mods_url)->basename;
my $marc_mods_path = path($xsl_dir)->child($marc_mods_basename)->stringify;
my $marc_mods_patched_basename = path($marc_mods_url)->basename(".xsl") . ".patched.xsl";
my $marc_mods_patched_path = path($xsl_dir)->child($marc_mods_patched_basename);
if (! $marc_mods_patched_path->is_file) {
say "Downloading MARC21->MODS xsl '$marc_mods_url'";
my $result = $ua->get($marc_mods_url);
if ($result->is_error) {
croak "Failed to download '$marc_mods_url', " . $result->error_as_HTML;
}
say "Modifying MARC21->MODS xsl for offline use";
my $xsl = $result->decoded_content;
write_file($marc_mods_path, $xsl);
my $xsl_modified = $xsl;
my $marc_utils_path = check_marc21_utility( $xsl_dir, $ua);
$xsl_modified =~ s#$marc_utils_url#$marc_utils_path#g;
say "Saving MARC21->MODS xsl to file '$marc_mods_path'";
write_file($marc_mods_patched_path, $xsl_modified);
}
return $marc_mods_patched_path;
}
sub check_xsl_directory {
# check xsl directory
my $xsl_dir = path(__FILE__)->parent->realpath->parent->child("xsl");
if (! $xsl_dir->is_dir) {
say "Rebuilding XSL directory '$xsl_dir'";
$xsl_dir->mkpath() || confess("could not mkdir '$xsl_dir', $!");
}
return $xsl_dir;
}
# the "old" approach does not handle umlauts or UTF8-chars above ASCII table # specification SRU/SRW BSZ: https://wiki.k10plus.de/pages/viewpage.action?pageId=132874251
# old: http://swb2.bsz-bw.de/sru/DB=2.1/username=/password=/?query=pica.ppn+%3D+"494384174"&startRecord=1&maximumRecords=10&recordSchema=marcxml sub get_mods_from($$$$) {
# new: http://swb2.bsz-bw.de/sru/DB=2.1/username=/password=/?query=pica.ppn+%3D+"494384174"&startRecord=1&maximumRecords=10&recordSchema=marc21&recordPacking=xml&version=1.1 # $mods = ($url, $ppn, $searchkey, $recordschema)
# with stylesheet:
# http://swb2.bsz-bw.de/sru/DB=2.1/username=/password=/?query=pica.ppn+%3D+%22494384174%22&version=1.1&operation=searchRetrieve&stylesheet=http%3A%2F%2Fswb2.bsz-bw.de%2Fsru%2FDB%3D2.1%2F%3Fxsl%3DsearchRetrieveResponse&recordSchema=marc21&maximumRecords=10&startRecord=1&recordPacking=xml&sortKeys=none&x-info-5-mg-requestGroupings=none
sub get_mods_from ($$) { # $mods = ($url, $ppn)
my $url = shift; my $url = shift;
my $ppn = shift; # example: "457035137" for "Der Fichtelberg" my $ppn = shift; # example: "457035137" for "Der Fichtelberg"
my $key = shift;
my $schema = shift;
#### where to find XSLT #### where to find XSLT
# my $marc_dc_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slim2RDFDC.xsl';
my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
my $ua = LWP::UserAgent->new; my $ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1 "); $ua->agent("MyApp/0.1 ");
$ua->timeout(3600); #1h $ua->timeout(3600); #1h
my $xsl_dir = check_xsl_directory();
check_marc21_utility($xsl_dir, $ua);
check_marc21_mods_xsl($xsl_dir, $ua);
my $srubase = $url; # host my $srubase = $url; # host
my $srusearchkey="pica.ppn"; my $srusearchkey = $key; # SRU search key
my $sruvalue = $ppn; my $sruvalue = $ppn;
my $srumaxrecords = 1; my $srumaxrecords = 1;
#my $sruschema="marcxml"; my $srustartrecord = 1;
my $sruschema="marc21"; my $sruschema = $schema;
#my $sru = "${srubase}?query=${srusearchkey}+%3D+%22${sruvalue}%22&startRecord=1&maximumRecords=${srumaxrecords}&recordSchema=${sruschema}"; my $sru = "${srubase}?version=1.1&query=${srusearchkey}%3D${sruvalue}&operation=searchRetrieve&maximumRecords=${srumaxrecords}&startRecord=${srustartrecord}&recordSchema=${sruschema}";
my $sru = "${srubase}?query=${srusearchkey}+%3D+%22${sruvalue}%22&startRecord=1&maximumRecords=${srumaxrecords}&recordSchema=${sruschema}&recordPacking=xml&version=1.1&stylesheet=http%3A%2F%2Fswb2.bsz-bw.de%2Fsru%2FDB%3D2.1%2F%3Fxsl%3DsearchRetrieveResponse"; if ($with_debug) {say "catalog-URL='$sru'";}
#p ($sru); # debug output my $response = $ua->get($sru); # ask SWB for given PPN
my $record = $ua->get($sru); # ask SWB for given PPN if ($response->is_success) {
if ($record->is_success) {
# parse ZiNG repsonse, extract MARC-data # parse ZiNG repsonse, extract MARC-data
my $xp = XML::XPath->new( $record->decoded_content ); my $xp = XML::XPath->new($response->decoded_content);
my $parser = XML::LibXML->new(); my $parser = XML::LibXML->new();
if ($with_debug) { if ($with_debug) {
say "write DEBUG_${ppn}_response.xml"; say "write DEBUG_${ppn}_response.xml";
write_file("DEBUG_${ppn}_response.xml", {binmode => ':utf8'}, $record->decoded_content); write_file("DEBUG_${ppn}_response.xml", $response->decoded_content);
} }
my $marcblob = $parser->parse_string( my $recordData = $xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*');
$xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*') if (!$recordData) { croak("ERROR: Did not get any <recordData/> for PPN '$ppn' using '$sru'");}
); my $marcblob = $parser->parse_string($recordData);
my $marcblob_patched = patch_marc_response($marcblob); my $marcblob_patched = patch_marc_response($marcblob);
if ($with_debug) { if ($with_debug) {
say "write DEBUG_${ppn}_marc_unpatched.xml"; say "write DEBUG_${ppn}_marc_unpatched.xml";
write_file("DEBUG_${ppn}_marc_unpatched.xml", {binmode => ':utf8'}, $marcblob); write_file("DEBUG_${ppn}_marc_unpatched.xml", $marcblob);
say "write DEBUG_${ppn}_marc.xml"; say "write DEBUG_${ppn}_marc.xml";
write_file("DEBUG_${ppn}_marc.xml", {binmode => ':utf8'}, $marcblob_patched); write_file("DEBUG_${ppn}_marc.xml", $marcblob_patched);
} }
my $marc_mods_patched_path = check_marc21_mods_xsl($xsl_dir, $ua);
my $xslt = XML::LibXSLT->new(); my $xslt = XML::LibXSLT->new();
my $marcmods = XML::LibXML->load_xml(location=>$marc_mods_url, no_cdata=>1); my $marcmods = XML::LibXML->load_xml(location => $marc_mods_patched_path, no_cdata => 1);
my $stylesheet = $xslt->parse_stylesheet($marcmods); my $stylesheet = $xslt->parse_stylesheet($marcmods);
my $marc = $parser->parse_string($marcblob_patched); my $marc = $parser->parse_string($marcblob_patched);
my $result = $stylesheet->transform($marc); my $result = $stylesheet->transform($marc);
if ($with_debug) { if ($with_debug) {
say "write DEBUG_${ppn}_unpatched_mods.xml"; say "write DEBUG_${ppn}_unpatched_mods.xml";
write_file("DEBUG_${ppn}_unpatched_mods.xml", {binmode => ':utf8'}, $stylesheet->output_string( $result )); write_file("DEBUG_${ppn}_unpatched_mods.xml", $stylesheet->output_string($result));
} }
$result = patch_mods($result); $result = patch_mods($result);
my $result_string = $stylesheet->output_string($result); my $result_string = $stylesheet->output_string($result);
return $result_string; return $result_string;
} else { }
else {
carp("Problem asking catalogue at $url using $ppn"); carp("Problem asking catalogue at $url using $ppn");
} }
return; return;
} }
#=============================================================================== sub create_filecopyhash {
my $directory = shift;
my $directory; my $content = shift;
my $ppn; my %filecopyhash;
my $noppn; my $wanted=sub {
my $output; if (-d $_) {
my $url; # dir, do nothing
my $as_zip; ();
my $external_id; } else {
my $external_workflow; my $file=$File::Find::name;
my $external_isil=""; if ($file !~ m#^[-A-Za-z0-9_\.:\\/]+$#) {
my $external_value_descr; confess("file '$file' does not match regex '^[-A-Za-z0-9_\.:\\/]+\$'");
my $external_conservation_flag;
our $VERSION = '1.0';
GetOptions(
"IE_directory=s" => \$directory,
"ppn=s" => \$ppn,
"noppn=s" => \$noppn,
"SIP_output_path=s" => \$output,
"as_zip" => \$as_zip,
"url=s" => \$url,
"external_id=s" => \$external_id,
"external_workflow=s" => \$external_workflow,
"external_ISIL=s" => \$external_isil,
"external_value_descr=s" => \$external_value_descr,
"external_conservation_flag" => \$external_conservation_flag,
"debug" => \$with_debug,
"help" => sub { pod2usage(1); exit(0); },
) or pod2usage(2);
if (!defined $directory) { confess ("you need to specify an IE directory, which needs to be archived"); }
if (!defined $ppn && !defined $noppn) { confess ("you need to specify a PPN, which exists in SWB catalogue"); }
if (defined $ppn && defined $noppn) {confess ("you could only use --ppn=foo or --noppn=bar"); }
if (!defined $output) { confess (" you need to specify an output path, where the SIP will be stored"); }
if ($output !~ m#^/#) { confess("you need to specify an output path using absoluet paths, $!"); }
if (!defined $url) { $url = "http://swb.bsz-bw.de/sru/DB=2.1/username=/password=/";}
if (!defined $external_conservation_flag) { $external_conservation_flag="false"; } else { $external_conservation_flag="true"; }
# additional checks
if (! -d $directory) { confess("you need to specify an IE directory, which needs to be archived, $!"); }
if ($directory !~ m#^/#) { confess("you need to specify an IE directory using absoluet paths, $!"); }
#if (! -d $output) { confess("you need to specify an output path, where the SIP will be stored, $!"); }
# get date
my $export_to_archive_date = DateTime->now->iso8601();#
my $file_date = $export_to_archive_date;
$file_date =~ s/T/_/g;
$file_date =~ s/:/-/g;
# create output dir
mkpath "$output" || confess("could not create SIP directory for '$output', $!");
my $sip_root_dir = "PPN-${ppn}_${file_date}";
my $content = "$output/$sip_root_dir/data";
if (!defined $as_zip) {
mkpath "$output/$sip_root_dir" || confess("could not create SIP directory for '$output/$sip_root_dir', $!");
mkpath "$content" || confess("could not create SIP subdirectory for '$content', $!");
} }
my $source = $file;
$filecopyhash{$source}->{'source'}=$file;
$file=~s#^$directory/?##;
$filecopyhash{$source}{'relative'}="data/$file";
$filecopyhash{$source}{'target'}="$content/$file";
my $fh;
open($fh, "<", $source) or confess ("Can't open '$source', $!");
binmode($fh);
my $ctx = Digest::MD5->new;
$ctx->addfile(*$fh);
close ($fh);
my $md5 = $ctx->hexdigest;
$filecopyhash{$source}{'md5sum'}=$md5;
}
};
finddepth($wanted, $directory);
return \%filecopyhash;
}
# prepare dmd-sec sub prepare_dmd_section_with_ppn ($) {
my $mods; my $ppn = shift;
if (defined $ppn) { my $mods = SLUB::LZA::SIPBuilder::get_mods_from($swb_url, $ppn, $searchkey, $recordschema);
$mods = get_mods_from($url, $ppn); if ($with_debug) {
if (1 == $with_debug) { SLUB::LZA::SIPBuilder::write_file("DEBUG_${ppn}_mods.xml", $mods);
write_file("DEBUG_${ppn}_mods.xml", {binmode => ':utf8'}, $mods);
} }
# remove the <xml /> from beginning of the answer # remove the <xml /> from beginning of the answer
$mods=~ s#<\?xml version="1.0" encoding="UTF-8"\?>#<!-- removed xml header from mods part -->#; $mods=~ s#<\?xml version="1.0" encoding="UTF-8"\?>#<!-- removed xml header from mods part -->#;
} elsif (defined $noppn) { my $dmd =<<"DMD";
$mods =<<MODS; <mets:dmdSec ID="DMDLOG_0000">
<mods version="3.5" <!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
$mods
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
DMD
return $dmd;
}
sub prepare_dmd_section_with_noppn ($) {
my $noppn = shift;
my $mods =<<"MODS";
<mods version="3.6"
xmlns="http://www.loc.gov/mods/v3" xmlns="http://www.loc.gov/mods/v3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-5.xsd"> xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
<identifier>$noppn</identifier> <identifier>$noppn</identifier>
</mods> </mods>
MODS MODS
} my $dmd =<<"DMD";
my $dmd =<<DMD;
<mets:dmdSec ID="DMDLOG_0000"> <mets:dmdSec ID="DMDLOG_0000">
<!-- bibliographic metadata --> <!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS"> <mets:mdWrap MDTYPE="MODS">
...@@ -260,9 +321,17 @@ my $dmd =<<DMD; ...@@ -260,9 +321,17 @@ my $dmd =<<DMD;
</mets:mdWrap> </mets:mdWrap>
</mets:dmdSec> </mets:dmdSec>
DMD DMD
return $dmd;
}
# prepare amd-sec sub prepare_amd_section($$$$$$) {
my $amd =<<AMD; my $export_to_archive_date = shift;
my $external_workflow = shift;
my $external_id = shift;
my $external_conservation_flag = shift;
my $external_isil = shift;
my $external_value_descr = shift;
my $amd =<<"AMD";
<mets:amdSec ID="AMD"> <mets:amdSec ID="AMD">
<!-- SIP metadata for automated processing by submission application --> <!-- SIP metadata for automated processing by submission application -->
<mets:techMD ID="ARCHIVE"> <mets:techMD ID="ARCHIVE">
...@@ -281,80 +350,149 @@ my $amd =<<AMD; ...@@ -281,80 +350,149 @@ my $amd =<<AMD;
</mets:techMD> </mets:techMD>
</mets:amdSec> </mets:amdSec>
AMD AMD
return $amd;
# create filecopyhash
my %filecopyhash;
my $wanted=sub {
if (-d $_) {
# dir, do nothing
()
} else {
my $file=$File::Find::name;
if ($file !~ m#^[-A-Za-z0-9_\./]+$#) {
confess("file '$file' does not match regex '^[-A-Za-z0-9_\./]+\$'");
}
my $source = $file;
$filecopyhash{$source}->{'source'}=$file;
$file=~s#^$directory/?##;
$filecopyhash{$source}{'relative'}="data/$file";
$filecopyhash{$source}{'target'}="$content/$file";
my $fh;
open($fh, "<", $source) or confess ("Can't open '$source' (current='",getcwd,"', $!\n");
binmode($fh);
my $ctx = Digest::MD5->new;
$ctx->addfile(*$fh);
close ($fh);
my $md5 = $ctx->hexdigest;
$filecopyhash{$source}{'md5sum'}=$md5;
} }
};
finddepth($wanted, $directory); sub prepare_files_sections($) {
my $filecopyhash = shift;
# create fileSec
my $filesec=<<FILESEC1;
<mets:fileSec>
<mets:fileGrp USE="LZA">
FILESEC1
{
my @fsec; my @fsec;
my $i=0; my $i=0;
foreach my $fkey (sort keys (%filecopyhash)) { foreach my $fkey (sort keys (%{$filecopyhash})) {
push @fsec, sprintf("<mets:file ID=\"FILE_%015u_LZA\" CHECKSUMTYPE=\"MD5\" CHECKSUM=\"%s\">", $i, $filecopyhash{$fkey}->{"md5sum"}); push @fsec, sprintf("<mets:file ID=\"FILE_%015u_LZA\" CHECKSUMTYPE=\"MD5\" CHECKSUM=\"%s\">", $i, $filecopyhash->{$fkey}->{"md5sum"});
push @fsec, sprintf("<mets:FLocat xmlns:xlink=\"http://www.w3.org/1999/xlink\" LOCTYPE=\"URL\" xlink:href=\"file://%s\"/>", $filecopyhash{$fkey}->{"relative"}); push @fsec, sprintf("<mets:FLocat xmlns:xlink=\"http://www.w3.org/1999/xlink\" LOCTYPE=\"URL\" xlink:href=\"file://%s\"/>", $filecopyhash->{$fkey}->{"relative"});
push @fsec, "</mets:file>"; push @fsec, "</mets:file>";
$i++; $i++;
} }
$filesec = join("\n", $filesec, @fsec); my $files = join("\n", @fsec);
} my $filesec=<<"FILESEC";
$filesec = $filesec . <<FILESEC2; <mets:fileSec>
<mets:fileGrp USE="LZA">
$files
</mets:fileGrp> </mets:fileGrp>
</mets:fileSec> </mets:fileSec>
FILESEC2 FILESEC
return $filesec;
}
# prepare structmap sub prepare_struct_map($) {
my $structmap =<<STRUCTMAP1; my $filecopyhash = shift;
<mets:structMap TYPE="PHYSICAL">
<mets:div ID="PHYS_0000" TYPE="ieDir">
STRUCTMAP1
{
my @ssec; my @ssec;
my $i=0; my $i=0;
foreach my $fkey (sort keys (%filecopyhash)) { foreach my $fkey (sort keys (%{$filecopyhash})) {
push @ssec, sprintf("<mets:div ID=\"PHYS_%015u_LZA\" TYPE=\"fileorderSequence\">", $i); push @ssec, sprintf("<mets:div ID=\"PHYS_%015u_LZA\" TYPE=\"fileorderSequence\">", $i);
push @ssec, sprintf("<mets:fptr FILEID=\"FILE_%015u_LZA\" />", $i); push @ssec, sprintf("<mets:fptr FILEID=\"FILE_%015u_LZA\" />", $i);
push @ssec, "</mets:div>"; push @ssec, "</mets:div>";
$i++; $i++;
} }
$structmap = join("\n", $structmap, @ssec); my $structs = join("\n", @ssec);
} my $structmap =<<"STRUCTMAP";
$structmap = $structmap . <<STRUCTMAP2; <mets:structMap TYPE="PHYSICAL">
<mets:div ID="PHYS_0000" TYPE="ieDir">
$structs
</mets:div> </mets:div>
</mets:structMap> </mets:structMap>
STRUCTMAP2 STRUCTMAP
return $structmap;
}
# end package
package main;
#===============================================================================
BEGIN{
$INC{'SLUB/LZA/SIPBuilder.pm'} = 1; # needed because inlined module
}
return 1 if caller; # avoids main code running if module stuff is needed
use SLUB::LZA::SIPBuilder;
use Archive::Zip::SimpleZip qw($SimpleZipError);
use Getopt::Long;
use Path::Tiny;
use Digest::MD5;
use constant buffer => 100 * 1024 * 1024; # use 100MB as Buffer
use File::Find;
use File::Copy qw(cp);
use Pod::Usage;
my $directory;
my $ppn;
my $noppn;
my $output;
my $as_zip;
my $external_id;
my $external_workflow;
my $external_isil="";
my $external_value_descr;
my $external_conservation_flag;
my $help;
my $man;
GetOptions(
"IE_directory=s" => \$directory, # required
"ppn=s" => \$ppn, # semi-optional (choice 1 of 2)
"noppn=s" => \$noppn, # semi-optional (choice 2 of 2)
"SIP_output_path=s" => \$output, # required
"as_zip" => \$as_zip, # optional, default: do not zip
"external_id=s" => \$external_id, # required
"external_workflow=s" => \$external_workflow, # required
"external_ISIL=s" => \$external_isil, # optional, default: no ISIL
"external_value_descr=s" => \$external_value_descr, # required
"external_conservation_flag" => \$external_conservation_flag, # optional, default: no special conservation
"debug" => \$SLUB::LZA::SIPBuilder::with_debug, # optional
"help|?" => \$help, # optional
"man" => \$man, # optional
) or pod2usage(2);
if ($help) { pod2usage(1); }
if ($man) { pod2usage(-exitval => 0, -verbose => 2); }
if (!defined $directory) { confess("you need to specify an IE directory, which needs to be archived"); }
if ((defined $ppn) && (defined $noppn)) { confess("you can only specify either -ppn or -noppn"); }
if ((!defined $ppn) && (!defined $noppn)) { confess("you need to specify a PPN with -ppn or use --noppn"); }
if (!defined $output) { confess("you need to specify an output path, where the SIP will be stored"); }
if (!defined $external_conservation_flag) { $external_conservation_flag="false"; } else { $external_conservation_flag="true"; }
if (! -d $directory) { confess("you need to specify an IE directory, which needs to be archived, $!"); }
$directory = path($directory)->realpath->stringify;
path($output)->mkpath;
$output = path($output)->realpath->stringify;
if ($external_id !~ m#^[a-z0-9]+$#) { confess("you need to specify a valid external ID (^[a-z0-9]+\$)"); }
if ($external_workflow !~ m#^[a-z0-9]+$#) { confess("you need to specify a valid external workflow (^[a-z0-9]+\$)"); }
if (!$external_value_descr) { confess("you need to specify an external value description (reason for archiving)"); }
#===============================================================================
sub main {
# get date
my $export_to_archive_date = DateTime->now->iso8601();
my $file_date = $export_to_archive_date;
$file_date =~ s/T/_/g; # replace 'T' with '_'
$file_date =~ s/:/-/g; # replace ':' with '-'
# prepare dirs
my $sip_root_dir = (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
my $content = path($output)->child($sip_root_dir)->child("data")->stringify;
if (!defined $as_zip) {
path($content)->mkpath;
}
my $filecopyhash = SLUB::LZA::SIPBuilder::create_filecopyhash($directory, $content);
# prepare dmd-sec
my $dmd = (defined $ppn)? SLUB::LZA::SIPBuilder::prepare_dmd_section_with_ppn( $ppn ) : SLUB::LZA::SIPBuilder::prepare_dmd_section_with_noppn( $noppn );
# prepare amd-sec
my $amd = SLUB::LZA::SIPBuilder::prepare_amd_section(
$export_to_archive_date,
$external_workflow,
$external_id,
$external_conservation_flag,
$external_isil,
$external_value_descr
);
# create fileSec
my $filesec = SLUB::LZA::SIPBuilder::prepare_files_sections($filecopyhash);
# prepare structmap
my $structmap = SLUB::LZA::SIPBuilder::prepare_struct_map($filecopyhash);
# create sip.xml # create sip.xml
my $sip =<<METS; my $sip =<<"METS";
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" <mets:mets xmlns:mets="http://www.loc.gov/METS/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
...@@ -366,30 +504,28 @@ my $sip =<<METS; ...@@ -366,30 +504,28 @@ my $sip =<<METS;
</mets:mets> </mets:mets>
METS METS
# compress if needed # write stuff out
if (!defined $as_zip) { if (!defined $as_zip) {
write_file( "${output}/${sip_root_dir}/sip.xml",{binmode => ':utf8'}, $sip ); SLUB::LZA::SIPBuilder::write_file( path($output)->child($sip_root_dir)->child("sip.xml")->stringify, $sip );
# copy source to target # copy source to target
foreach my $source (sort keys (%filecopyhash)) { foreach my $source (sort keys (%{$filecopyhash})) {
my $target = $filecopyhash{$source}->{"target"}; my $target = path($filecopyhash->{$source}->{"target"})->stringify; # CHECK ON WINDOWS
my $basename = dirname($target); my $basename = path($target)->parent->stringify;
#say "cp $source, $target ($basename)";
if (! -d $basename) { if (! -d $basename) {
mkpath $basename || confess ("could not mkdir '$basename', $!"); path($basename)->mkpath;
} }
cp($source, $target, buffer) || confess ("could not copy from '$source' to '$target', $!"); cp($source, $target, buffer) || confess ("could not copy from '$source' to '$target', $!");
} }
say "SIP '$sip_root_dir' build successfully in '$output'"; say "SIP '$sip_root_dir' build successfully in '$output'";
} else { } else {
# compress it # compress it
my $zip_file_path = "$output/$sip_root_dir.zip"; my $zip_file_path = path($output)->child("$sip_root_dir.zip")->stringify;
my $zip = Archive::Zip::SimpleZip->new( $zip_file_path, Zip64=>1 ); my $zip = Archive::Zip::SimpleZip->new( $zip_file_path, Zip64=>1 );
$zip->addString($sip, Name=> "$sip_root_dir/sip.xml" ); $zip->addString($sip, Name=>path($sip_root_dir)->child("sip.xml")->stringify);
# copy source to target # copy source to target
foreach my $source (sort keys (%filecopyhash)) { foreach my $source (sort keys (%{$filecopyhash})) {
my $target = "$sip_root_dir/".$filecopyhash{$source}->{"relative"}; my $target = path($sip_root_dir)->child($filecopyhash->{$source}->{"relative"})->stringify; # CHECK ON WINDOWS
my $basename = dirname($target); my $basename = path($target)->parent->stringify;
#say "cp $source, $target ($basename)";
$zip->add( $source, Name=> $target) || confess ("could not zip copy from '$source' to '$target', $!"); $zip->add( $source, Name=> $target) || confess ("could not zip copy from '$source' to '$target', $!");
} }
unless ( $zip->close()) { unless ( $zip->close()) {
...@@ -397,6 +533,18 @@ if (!defined $as_zip) { ...@@ -397,6 +533,18 @@ if (!defined $as_zip) {
} }
say "SIP '$sip_root_dir' build successfully in '$zip_file_path'"; say "SIP '$sip_root_dir' build successfully in '$zip_file_path'";
} }
return;
}
#===============================================================================
main();
#===============================================================================
__END__
=pod =pod
...@@ -413,17 +561,16 @@ slubsipbuilder.pl [options] ...@@ -413,17 +561,16 @@ slubsipbuilder.pl [options]
-man full documentation -man full documentation
-IE_directory=<IE dir> existing IE directory (absolute path!) -IE_directory=<IE dir> existing IE directory (absolute path!)
-ppn=<ppn>|-noppn=<noppn> PPN (swb catalogue) or any identifier (uses minimalistic MODS) -ppn=<ppn>|-noppn=<noppn> SWB-PPN or any identifier (uses minimalistic MODS)
-SIP_output_path=<target dir> where to put the SIP dir (absolute path!) -SIP_output_path=<target dir> where to put the SIP dir (absolute path!)
-as_zip optional, if set a ZIP will be created -as_zip optional, if set a ZIP will be created
-url=<SRU url> optional, URL of the SRU for PICA catalogues
-external_id=<id> mandatory, should be uniqe ID -external_id=<id> mandatory, should be uniqe ID
-external_workflow=<workflow> mandatory, should be uniqe workflow name -external_workflow=<workflow> mandatory, should be uniqe workflow name
-external_ISIL=<isil> optional, ISIL number of library -external_ISIL=<isil> optional, ISIL number of library
-external_value_descr=<text> mandatory, the reason why to archive -external_value_descr=<text> mandatory, the reason why to archive
-external_conservation_flag optional, if set no other "original" still exists -external_conservation_flag optional, if set no other "original" still exists
slubsipbuilder.pl --IE_directory=/processdir_from_goobi/10008 --ppn=457035137 --SIP_output_path=/tmp/mysip --external_id=10008 --external_workflow=goobitest --external_ISIL=de-14 --external_value_descr="Gesetzlicher Auftrag" --as_zip slubsipbuilder.pl --IE_directory=/export_dir_kitodo/10008 --ppn=457035137 --SIP_output_path=/tmp/mysip --external_id=10008 --external_workflow=kitodo --external_ISIL=DE-14 --external_value_descr="Gesetzlicher Auftrag"
=head1 OPTIONS =head1 OPTIONS
...@@ -440,4 +587,3 @@ Print a brief help message and exits. ...@@ -440,4 +587,3 @@ Print a brief help message and exits.
B<This program> will process the given IE directory, add bibliographic metadata from catalogue with given PICA number and check and create a SIP directory ready for SLUBarchiv B<This program> will process the given IE directory, add bibliographic metadata from catalogue with given PICA number and check and create a SIP directory ready for SLUBarchiv
=cut =cut
# vim: set tabstop=4
#!/usr/bin/perl -w
use strict;
use warnings;
use diagnostics;
use Test::More tests => 8;
use Test::Exception;
use Test::File;
use Path::Tiny;
### prepare
BEGIN {
use Path::Tiny;
push @INC, Path::Tiny::path(__FILE__)->parent->parent->path("bin")->absolute->stringify;
require "slubsipbuilder.pl";
$INC{'SLUB/LZA/SIPBuilder.pm'} = 1; # needed because inlined module
}
my $unpatched_mods=<<'UNPATCHED_MODS';
<?xml version="1.0" encoding="UTF-8"?>
<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/mods/v3" version="3.6" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd"><titleInfo><nonSort xml:space="preserve">Der </nonSort><title>Fichtelberg</title><subTitle>Berg der unbekannten Rekorrde</subTitle></titleInfo><name type="personal"><namePart>Schneider, Dirk</namePart><role><roleTerm type="text">FilmemacherIn</roleTerm></role><role><roleTerm authority="marcrelator" type="code">fmk</roleTerm></role><nameIdentifier>(DE-627)1235502279 (DE-576)165502274</nameIdentifier></name><typeOfResource>moving image</typeOfResource><genre authority="rdacontent">zweidimensionales bewegtes Bild</genre><genre authority="gnd-content">Film</genre><originInfo><place><placeTerm type="code" authority="marccountry">xx</placeTerm></place><dateIssued encoding="marc">2014</dateIssued><issuance>monographic</issuance></originInfo><originInfo eventType="publication"><place><placeTerm type="text">[Leipzig]</placeTerm></place><publisher>top ten tv</publisher><dateIssued>[2014]</dateIssued></originInfo><language><languageTerm authority="iso639-2b" type="code">ger</languageTerm></language><physicalDescription><form authority="marccategory">electronic resource</form><form authority="marcsmd">remote</form><extent>1 Online-Ressource (1 Videodatei, 29:49) farbig</extent><form type="media" authority="rdamedia">Computermedien</form><form type="carrier" authority="rdacarrier">Online-Ressource</form></physicalDescription><targetAudience authority="marctarget">juvenile</targetAudience><note type="statement of responsibility" altRepGroup="00">ein Film von Dirk Schneider</note><note>Dokumentarfilm. Deutschland. 2014</note><relatedItem type="series"><titleInfo><title>MDR</title></titleInfo></relatedItem><relatedItem type="series"><titleInfo><title>Der Osten - entdecke wo du lebst</title></titleInfo></relatedItem><identifier type="oclc">946544758</identifier><recordInfo><descriptionStandard>rda</descriptionStandard><recordContentSource authority="marcorg">DE-576</recordContentSource><recordCreationDate encoding="marc">160304</recordCreationDate><recordChangeDate encoding="iso8601">20160510144338.0</recordChangeDate><recordIdentifier source="DE-576">457035137</recordIdentifier><recordOrigin>Converted from MARCXML to MODS version 3.6 using MARC21slim2MODS3-6.xsl
(Revision 1.119 2018/06/21)</recordOrigin><languageOfCataloging><languageTerm authority="iso639-2b" type="code">ger</languageTerm></languageOfCataloging></recordInfo></mods>
UNPATCHED_MODS
my $patched_mods=<<'PATCHED_MODS';
<?xml version="1.0" encoding="UTF-8"?>
<mods xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.loc.gov/mods/v3" version="3.6" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd"><titleInfo><nonSort xml:space="preserve">Der </nonSort><title>Fichtelberg</title><subTitle>Berg der unbekannten Rekorrde</subTitle></titleInfo><name type="personal"><namePart>Schneider, Dirk</namePart><role><roleTerm type="text">FilmemacherIn</roleTerm></role><role><roleTerm authority="marcrelator" type="code">fmk</roleTerm></role><nameIdentifier>(DE-627)1235502279 (DE-576)165502274</nameIdentifier></name><typeOfResource>moving image</typeOfResource><genre authority="rdacontent">zweidimensionales bewegtes Bild</genre><genre authority="gnd-content">Film</genre><originInfo><place><placeTerm type="code" authority="marccountry">xx</placeTerm></place><dateIssued encoding="marc">2014</dateIssued><issuance>monographic</issuance></originInfo><originInfo eventType="publication"><place><placeTerm type="text">[Leipzig]</placeTerm></place><publisher>top ten tv</publisher><dateIssued>[2014]</dateIssued></originInfo><language><languageTerm authority="iso639-2b" type="code">ger</languageTerm></language><physicalDescription><form authority="marccategory">electronic resource</form><form authority="marcsmd">remote</form><extent>1 Online-Ressource (1 Videodatei, 29:49) farbig</extent><form type="media" authority="rdamedia">Computermedien</form><form type="carrier" authority="rdacarrier">Online-Ressource</form></physicalDescription><targetAudience authority="marctarget">juvenile</targetAudience><note type="statement of responsibility" altRepGroup="00">ein Film von Dirk Schneider</note><note>Dokumentarfilm. Deutschland. 2014</note><relatedItem type="series"><titleInfo><title>MDR</title></titleInfo></relatedItem><relatedItem type="series"><titleInfo><title>Der Osten - entdecke wo du lebst</title></titleInfo></relatedItem><identifier type="oclc">946544758</identifier><recordInfo><descriptionStandard>rda</descriptionStandard><recordContentSource authority="marcorg">DE-576</recordContentSource><recordCreationDate encoding="marc">160304</recordCreationDate><recordChangeDate encoding="iso8601">20160510144338.0</recordChangeDate><recordIdentifier source="DE-576">457035137</recordIdentifier><recordOrigin>Converted from MARCXML to MODS version 3.6 using MARC21slim2MODS3-6.xsl
(Revision 1.119 2018/06/21)</recordOrigin><languageOfCataloging><languageTerm authority="iso639-2b" type="code">ger</languageTerm></languageOfCataloging></recordInfo></mods>
PATCHED_MODS
my $unpatched_mods_obj = XML::LibXML->load_xml(string => $unpatched_mods);
my $unpatched_marcblob=<<'UNPATCHED_MARCBLOB';
<?xml version="1.0"?>
<record xmlns="http://www.loc.gov/MARC21/slim">
<leader> cgm a22 4500</leader>
<controlfield tag="001">457035137</controlfield>
<controlfield tag="003">DE-576</controlfield>
<controlfield tag="005">20160510144338.0</controlfield>
<controlfield tag="006">m o | | </controlfield>
<controlfield tag="007">cr uuu---uuuuu</controlfield>
<controlfield tag="007">vu uuuuuu</controlfield>
<controlfield tag="008">160304s2014 xx ger c</controlfield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-627)1655506501</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-576)457035137</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-599)BSZ457035137</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)946544758</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">DE-576</subfield>
<subfield code="b">ger</subfield>
<subfield code="c">DE-576</subfield>
<subfield code="e">rda</subfield>
</datafield>
<datafield tag="041" ind1=" " ind2=" ">
<subfield code="a">ger</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="4">
<subfield code="a">Der Fichtelberg</subfield>
<subfield code="b">Berg der unbekannten Rekorrde</subfield>
<subfield code="c">ein Film von Dirk Schneider</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="1">
<subfield code="a">[Leipzig]</subfield>
<subfield code="b">top ten tv</subfield>
<subfield code="c">[2014]</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="4">
<subfield code="c">&#xA9; 2014</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">1 Online-Ressource (1 Videodatei, 29:49)</subfield>
<subfield code="b">farbig</subfield>
</datafield>
<datafield tag="336" ind1=" " ind2=" ">
<subfield code="a">zweidimensionales bewegtes Bild</subfield>
<subfield code="b">tdi</subfield>
<subfield code="2">rdacontent</subfield>
</datafield>
<datafield tag="337" ind1=" " ind2=" ">
<subfield code="a">Computermedien</subfield>
<subfield code="b">c</subfield>
<subfield code="2">rdamedia</subfield>
</datafield>
<datafield tag="338" ind1=" " ind2=" ">
<subfield code="a">Online-Ressource</subfield>
<subfield code="b">cr</subfield>
<subfield code="2">rdacarrier</subfield>
</datafield>
<datafield tag="490" ind1="0" ind2=" ">
<subfield code="a">MDR</subfield>
</datafield>
<datafield tag="490" ind1="0" ind2=" ">
<subfield code="a">Der Osten - entdecke wo du lebst</subfield>
</datafield>
<datafield tag="500" ind1=" " ind2=" ">
<subfield code="a">Dokumentarfilm. Deutschland. 2014</subfield>
</datafield>
<datafield tag="591" ind1=" " ind2=" ">
<subfield code="a">Fernsehmitschnitt (SWB)</subfield>
</datafield>
<datafield tag="655" ind1=" " ind2="7">
<subfield code="a">Film</subfield>
<subfield code="0">(DE-588)4017102-4</subfield>
<subfield code="0">(DE-627)104559683</subfield>
<subfield code="0">(DE-576)208918531</subfield>
<subfield code="2">gnd-content</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Schneider, Dirk</subfield>
<subfield code="e">FilmemacherIn</subfield>
<subfield code="0">(DE-627)1235502279</subfield>
<subfield code="0">(DE-576)165502274</subfield>
<subfield code="4">fmk</subfield>
</datafield>
<datafield tag="935" ind1=" " ind2=" ">
<subfield code="c">vide</subfield>
</datafield>
<datafield tag="937" ind1=" " ind2=" ">
<subfield code="a">Dokumentarfilm</subfield>
<subfield code="b">Deutschland</subfield>
<subfield code="c">2014</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">000 xxxxxcx a22 zn 4500</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">001 901795887</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">003 DE-576</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">004 457035137</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">005 20160510125331</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">008 160304||||||||||||||||ger|||||||</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">040 </subfield>
<subfield code="a">DE-14</subfield>
<subfield code="c">DE-576</subfield>
<subfield code="d">DE-14</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">852 </subfield>
<subfield code="z">Fernsehmitschnitt: MDR, 04.02.2014. - Beilage</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">852 </subfield>
<subfield code="a">DE-14</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">852 1</subfield>
<subfield code="9">00</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">866 </subfield>
<subfield code="x">ddsu,pn</subfield>
</datafield>
</record>
UNPATCHED_MARCBLOB
my $patched_marcblob =<<'PATCHED_MARCBLOB';
<?xml version="1.0"?>
<record xmlns="http://www.loc.gov/MARC21/slim">
<leader> cgm a22 4500</leader>
<controlfield tag="001">457035137</controlfield>
<controlfield tag="003">DE-576</controlfield>
<controlfield tag="005">20160510144338.0</controlfield>
<controlfield tag="006">m o | | </controlfield>
<controlfield tag="007">cr uuu---uuuuu</controlfield>
<controlfield tag="007">vu uuuuuu</controlfield>
<controlfield tag="008">160304s2014 xx ger c</controlfield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-627)1655506501</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-576)457035137</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-599)BSZ457035137</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)946544758</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">DE-576</subfield>
<subfield code="b">ger</subfield>
<subfield code="c">DE-576</subfield>
<subfield code="e">rda</subfield>
</datafield>
<datafield tag="041" ind1=" " ind2=" ">
<subfield code="a">ger</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="4">
<subfield code="a">Der Fichtelberg</subfield>
<subfield code="b">Berg der unbekannten Rekorrde</subfield>
<subfield code="c">ein Film von Dirk Schneider</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="1">
<subfield code="a">[Leipzig]</subfield>
<subfield code="b">top ten tv</subfield>
<subfield code="c">[2014]</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="4">
<subfield code="c">&#xA9; 2014</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">1 Online-Ressource (1 Videodatei, 29:49)</subfield>
<subfield code="b">farbig</subfield>
</datafield>
<datafield tag="336" ind1=" " ind2=" ">
<subfield code="a">zweidimensionales bewegtes Bild</subfield>
<subfield code="b">tdi</subfield>
<subfield code="2">rdacontent</subfield>
</datafield>
<datafield tag="337" ind1=" " ind2=" ">
<subfield code="a">Computermedien</subfield>
<subfield code="b">c</subfield>
<subfield code="2">rdamedia</subfield>
</datafield>
<datafield tag="338" ind1=" " ind2=" ">
<subfield code="a">Online-Ressource</subfield>
<subfield code="b">cr</subfield>
<subfield code="2">rdacarrier</subfield>
</datafield>
<datafield tag="490" ind1="0" ind2=" ">
<subfield code="a">MDR</subfield>
</datafield>
<datafield tag="490" ind1="0" ind2=" ">
<subfield code="a">Der Osten - entdecke wo du lebst</subfield>
</datafield>
<datafield tag="500" ind1=" " ind2=" ">
<subfield code="a">Dokumentarfilm. Deutschland. 2014</subfield>
</datafield>
<datafield tag="591" ind1=" " ind2=" ">
<subfield code="a">Fernsehmitschnitt (SWB)</subfield>
</datafield>
<datafield tag="655" ind1=" " ind2="7">
<subfield code="a">Film</subfield>
<subfield code="0">(DE-588)4017102-4</subfield>
<subfield code="0">(DE-627)104559683</subfield>
<subfield code="0">(DE-576)208918531</subfield>
<subfield code="2">gnd-content</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Schneider, Dirk</subfield>
<subfield code="e">FilmemacherIn</subfield>
<subfield code="0">(DE-627)1235502279</subfield>
<subfield code="0">(DE-576)165502274</subfield>
<subfield code="4">fmk</subfield>
</datafield>
<datafield tag="935" ind1=" " ind2=" ">
<subfield code="c">vide</subfield>
</datafield>
<datafield tag="937" ind1=" " ind2=" ">
<subfield code="a">Dokumentarfilm</subfield>
<subfield code="b">Deutschland</subfield>
<subfield code="c">2014</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">000 xxxxxcx a22 zn 4500</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">001 901795887</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">003 DE-576</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">004 457035137</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">005 20160510125331</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">008 160304||||||||||||||||ger|||||||</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">040 </subfield>
<subfield code="a">DE-14</subfield>
<subfield code="c">DE-576</subfield>
<subfield code="d">DE-14</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">852 </subfield>
<subfield code="z">Fernsehmitschnitt: MDR, 04.02.2014. - Beilage</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">852 </subfield>
<subfield code="a">DE-14</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">852 1</subfield>
<subfield code="9">00</subfield>
</datafield>
<datafield tag="LOK" ind1=" " ind2=" ">
<subfield code="0">866 </subfield>
<subfield code="x">ddsu,pn</subfield>
</datafield>
</record>
PATCHED_MARCBLOB
my $unpatched_marcblob_obj = XML::LibXML->load_xml(string => $unpatched_marcblob);
### tests
BEGIN { use_ok("SLUB::LZA::SIPBuilder"); }
like(SLUB::LZA::SIPBuilder::get_mods_from("https://sru.bsz-bw.de/swb", "457035137", "pica.swn", "marcxmlvbos"), qr//, "get_mods_from()");
is(SLUB::LZA::SIPBuilder::patch_mods($unpatched_mods_obj), $patched_mods, "patch_mods()" );
is(SLUB::LZA::SIPBuilder::patch_marc_response($unpatched_marcblob_obj), $patched_marcblob, "patch_marc_response()");
# ensure no dir exists, then run test
my $xsl_path = path(__FILE__)->parent->parent->child('xsl');
if ($xsl_path->is_dir) { $xsl_path->remove_tree; }
is(SLUB::LZA::SIPBuilder::check_xsl_directory(), $xsl_path->absolute, "check_xsl_directory(), return value if not exist");
ok($xsl_path->is_dir, "check_xsl_directory(), created if not exist");
is(SLUB::LZA::SIPBuilder::check_xsl_directory(), $xsl_path->absolute, "check_xsl_directory(), return value if exist");
ok($xsl_path->is_dir, "check_xsl_directory(), untouched if exist");
1;
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" targetNamespace="http://slub-dresden.de" xmlns:archive="http://slub-dresden.de" xmlns:v3="http://www.loc.gov/mods/v3" xmlns:mets="http://www.loc.gov/METS/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink"> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" attributeFormDefault="unqualified" targetNamespace="http://slub-dresden.de/slubarchiv" xmlns:archive="http://slub-dresden.de/slubarchiv">
<xs:import namespace="http://www.loc.gov/METS/" schemaLocation="sip.xsd"/> <xs:element name="record" type="archive:record"/>
<xs:import namespace="http://www.loc.gov/mods/v3" schemaLocation="v3.xsd"/> <xs:complexType name="record">
<xs:import namespace="http://www.w3.org/1999/xlink" schemaLocation="xlink.xsd"/> <xs:all>
<xs:import namespace="http://www.w3.org/2001/XMLSchema-instance" schemaLocation="xsi.xsd"/> <xs:element name="archivalValueDescription" type="archive:NonEmptyString" minOccurs="1" maxOccurs="1"/>
<xs:element name="record"> <xs:element name="exportToArchiveDate" type="xs:dateTime" minOccurs="1" maxOccurs="1"/>
<xs:complexType> <xs:element name="externalId" type="archive:LzaIdSubstring" minOccurs="1" maxOccurs="1"/>
<xs:sequence> <xs:element name="externalIsilId" type="archive:NonEmptyString" minOccurs="0" maxOccurs="1"/>
<xs:element ref="archive:exportToArchiveDate"/> <xs:element name="externalWorkflow" type="archive:LzaIdSubstring" minOccurs="1" maxOccurs="1"/>
<xs:element ref="archive:externalId"/> <xs:element name="hasConservationReason" type="xs:boolean" minOccurs="1" maxOccurs="1"/>
<xs:element ref="archive:externalWorkflow"/> </xs:all>
<xs:element ref="archive:hasConservationReason"/> <xs:attribute name="version" type="archive:SipVersionString" use="required"/>
<xs:element ref="archive:externalIsilId"/>
<xs:element ref="archive:archivalValueDescription"/>
</xs:sequence>
</xs:complexType> </xs:complexType>
</xs:element> <xs:simpleType name="NonEmptyString">
<xs:element name="exportToArchiveDate" type="xs:dateTime"/> <xs:restriction base="xs:string">
<xs:element name="externalId" type="xs:string"/> <xs:pattern value="[\s\S]*[^ ][\s\S]*"/>
<xs:element name="externalWorkflow" type="xs:string"/> </xs:restriction>
<xs:element name="hasConservationReason" type="xs:boolean"/> </xs:simpleType>
<xs:element name="externalIsilId" type="xs:string"/> <xs:simpleType name="LzaIdSubstring">
<xs:element name="archivalValueDescription" type="xs:string"/> <xs:restriction base="xs:string">
<xs:pattern value="[a-z0-9]+"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="SipVersionString">
<xs:restriction base="xs:string">
<xs:enumeration value="v2017.1"/>
</xs:restriction>
</xs:simpleType>
</xs:schema> </xs:schema>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment