Skip to content
Snippets Groups Projects
Commit 546c488b authored by Jens Steidl's avatar Jens Steidl :baby_chick:
Browse files

- refactor: removed MODS transformation, use MARCXML for meta/marc21.xml directly

parent ccf1d55f
No related branches found
No related tags found
No related merge requests found
...@@ -77,8 +77,6 @@ package SLUB::LZA::SIPBuilderBagIt; ...@@ -77,8 +77,6 @@ package SLUB::LZA::SIPBuilderBagIt;
our $with_debug = 0; # output debug infos and files our $with_debug = 0; # output debug infos and files
# catalogue infos # catalogue infos
my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
my $marc_utils_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl';
my $swb_url = 'https://sru.bsz-bw.de/swb'; my $swb_url = 'https://sru.bsz-bw.de/swb';
my $searchkey_swb = 'pica.swn'; my $searchkey_swb = 'pica.swn';
my $searchkey_k10p = 'pica.ppn'; my $searchkey_k10p = 'pica.ppn';
...@@ -193,9 +191,6 @@ package SLUB::LZA::SIPBuilderBagIt; ...@@ -193,9 +191,6 @@ package SLUB::LZA::SIPBuilderBagIt;
$ua->agent("MyApp/0.1 "); $ua->agent("MyApp/0.1 ");
$ua->timeout(3600); #1h $ua->timeout(3600); #1h
my $xsl_dir = SLUB::LZA::SIPBuilderBagIt::check_xsl_directory();
SLUB::LZA::SIPBuilderBagIt::check_marc21_utility($xsl_dir, $ua);
my $srubase = $url; # host my $srubase = $url; # host
my $srusearchkey = $key; # SRU search key my $srusearchkey = $key; # SRU search key
my $sruvalue = $ppn; my $sruvalue = $ppn;
...@@ -236,35 +231,6 @@ package SLUB::LZA::SIPBuilderBagIt; ...@@ -236,35 +231,6 @@ package SLUB::LZA::SIPBuilderBagIt;
return; return;
} }
sub get_mods_from_marc21($) {
if(! $_[0]){ croak "marc21 empty!"; }
my $marc21 = shift;
my $ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1 ");
$ua->timeout(3600); #1h
my $xsl_dir = SLUB::LZA::SIPBuilderBagIt::check_xsl_directory();
my $marc_mods_patched_path = SLUB::LZA::SIPBuilderBagIt::check_marc21_mods_xsl($xsl_dir, $ua);
my $xslt = XML::LibXSLT->new();
my $marcmods = XML::LibXML->load_xml(location => $marc_mods_patched_path, no_cdata => 1);
my $stylesheet = $xslt->parse_stylesheet($marcmods);
my $parser = XML::LibXML->new();
my $marc = $parser->parse_string($marc21);
my $result = $stylesheet->transform($marc);
my $mods = $stylesheet->output_string($result);
my $mods_patched = $stylesheet->output_string( patch_mods($result) );
if ($with_debug) {
say "write DEBUG_mods_unpatched.xml";
SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_mods_unpatched.xml", $mods);
say "write DEBUG_mods.xml";
SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_mods.xml", $mods_patched);
}
return $mods_patched;
}
sub write_file($$){ sub write_file($$){
if(! defined $_[0]) { croak "filename not defined!"; } if(! defined $_[0]) { croak "filename not defined!"; }
if(! defined $_[1]) { croak "value not defined!"; } if(! defined $_[1]) { croak "value not defined!"; }
...@@ -280,73 +246,6 @@ package SLUB::LZA::SIPBuilderBagIt; ...@@ -280,73 +246,6 @@ package SLUB::LZA::SIPBuilderBagIt;
return 1; return 1;
} }
sub check_xsl_directory{
my $xsl_dir = path(__FILE__)->parent->realpath->parent->child("xsl");
if (! $xsl_dir->is_dir) {
say "Rebuilding XSL directory '$xsl_dir'";
$xsl_dir->mkpath() || confess("could not mkdir '$xsl_dir', $!");
}
return $xsl_dir->absolute();
}
sub check_marc21_utility($$){
# check MARC21 utility xsl
if(! defined $_[0]) { croak "xsl directory not defined!"; }
if(! defined $_[1]) { croak "user agent not defined!"; }
if($_[0] eq "" || $_[1] eq ""){ die "invalid parameters."; }
my $xsl_dir = shift;
my $ua = shift;
my $marc_utils_basename = path($marc_utils_url)->basename;
my $marc_utils_path = path($xsl_dir)->child($marc_utils_basename);
if (!$marc_utils_path->is_file) {
say "Downloading MARC21 utility xsl '$marc_utils_url'";
my $result = $ua->get($marc_utils_url);
if ($result->is_error) {
print_scalar_data("", "ERROR: Failed to download '$marc_utils_url' (required for metadata download by PPN), " . $result->error_as_HTML, "", "red");
exit 1;
}
say "Saving MARC21 utility xsl to file '$marc_utils_path'";
my $xsl = $result->decoded_content;
SLUB::LZA::SIPBuilderBagIt::write_file($marc_utils_path, $xsl);
}
return $marc_utils_path;
}
sub check_marc21_mods_xsl($$){
# check MARC21->MODS xsl
if(! defined $_[0]) { croak "xsl directory not defined!"; }
if(! defined $_[1]) { croak "user agent not defined!"; }
if($_[0] eq "" || $_[1] eq ""){ die "invalid parameters."; }
my $xsl_dir = shift;
my $ua = shift;
my $marc_mods_basename = path($marc_mods_url)->basename;
my $marc_mods_path = path($xsl_dir)->child($marc_mods_basename)->stringify;
my $marc_mods_patched_basename = path($marc_mods_url)->basename(".xsl") . ".patched.xsl";
my $marc_mods_patched_path = path($xsl_dir)->child($marc_mods_patched_basename);
if (! $marc_mods_patched_path->is_file) {
my $xsl;
say "Downloading MARC21->MODS xsl '$marc_mods_url'";
my $result = $ua->get($marc_mods_url);
if ($result->is_error) {
print_scalar_data("", "Failed to download '$marc_mods_url' (required for metadata download by PPN), " . $result->error_as_HTML, "", "red");
exit 1;
}
say "Modifying MARC21->MODS xsl for offline use";
$xsl = $result->decoded_content;
write_file($marc_mods_path, $xsl);
my $xsl_modified = $xsl;
my $marc_utils_path = check_marc21_utility($xsl_dir, $ua);
$xsl_modified =~ s#$marc_utils_url#$marc_utils_path#g;
say "Saving MARC21->MODS xsl to file '$marc_mods_path'";
write_file($marc_mods_patched_path, $xsl_modified);
}
return $marc_mods_patched_path;
}
sub patch_marc_response($){ sub patch_marc_response($){
if(! defined $_[0]) { croak "marcobject not defined!"; } if(! defined $_[0]) { croak "marcobject not defined!"; }
if($_[0] eq ""){ die "invalid parameters."; } if($_[0] eq ""){ die "invalid parameters."; }
...@@ -385,42 +284,6 @@ PATCH2 ...@@ -385,42 +284,6 @@ PATCH2
return $result; return $result;
} }
sub patch_mods($){
# this will patch the mods-xml as a workaround for bugs in LOCs xslt files
if(! defined $_[0]) { croak "modsobject not defined!"; }
if($_[0] eq ""){ die "invalid parameters."; }
my $modsobj = shift; # mods expected as XML Parser object
# TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1]
my $xslt_patch_string = <<'PATCH';
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd"
exclude-result-prefixes="xs"
version="1.0">
<xsl:output encoding="UTF-8" indent="yes" method="xml"/>
<xsl:strip-space elements="*"/>
<xsl:template match="//mods:mods/mods:relatedItem[mods:internetMediaType]">
<xsl:comment>patched wrong //mods:mods/mods:relatedItem[mods:internetMediaType]</xsl:comment>
</xsl:template>
<xsl:template match="@* | node()">
<xsl:copy>
<xsl:apply-templates select="@* | node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
PATCH
my $xslt = XML::LibXSLT->new();
my $xslt_patch = XML::LibXML->load_xml(string => $xslt_patch_string, no_cdata => 1);
my $stylesheet = $xslt->parse_stylesheet($xslt_patch);
my $result = $stylesheet->transform($modsobj);
return $result;
}
sub generateBagName($$$){ sub generateBagName($$$){
my $file_date = $_[0]; my $file_date = $_[0];
my $ppn = $_[1]; my $ppn = $_[1];
...@@ -449,7 +312,7 @@ PATCH ...@@ -449,7 +312,7 @@ PATCH
my $metaPath = $_[1]; my $metaPath = $_[1];
my $rightsFilePath = $_[2]; my $rightsFilePath = $_[2];
my $aRefAddMetaFile = $_[3]; my $aRefAddMetaFile = $_[3];
my $mods = $_[4]; my $marc21 = $_[4];
my @addMetaFile = @{ $aRefAddMetaFile }; my @addMetaFile = @{ $aRefAddMetaFile };
...@@ -458,7 +321,7 @@ PATCH ...@@ -458,7 +321,7 @@ PATCH
foreach my $file(@addMetaFile){ foreach my $file(@addMetaFile){
# check reserved file names # check reserved file names
my $meta_file_name = basename($file); my $meta_file_name = basename($file);
if($meta_file_name eq "rights.xml" || $meta_file_name eq "mods.xml"){ if($meta_file_name eq "rights.xml" || $meta_file_name eq "marc21.xml"){
foreach my $f(@addMetaFile){ foreach my $f(@addMetaFile){
# check if numbered xml file exists # check if numbered xml file exists
my $f_name = basename($f); my $f_name = basename($f);
...@@ -466,7 +329,7 @@ PATCH ...@@ -466,7 +329,7 @@ PATCH
$i++; $i++;
} }
} }
print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <mods.xml> is reserved.", "", "yellow"); print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <marc21.xml> is reserved.", "", "yellow");
print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white"); print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
copy($file, $metaPath) or die "Copy failed: $!"; copy($file, $metaPath) or die "Copy failed: $!";
rename("$metaPath/$meta_file_name", "$metaPath/$i.xml"); rename("$metaPath/$meta_file_name", "$metaPath/$i.xml");
...@@ -477,9 +340,9 @@ PATCH ...@@ -477,9 +340,9 @@ PATCH
} }
} }
# mods.xml # marc21.xml
if ($ppn && $mods) { if ($ppn && $marc21) {
write_file("$metaPath/mods.xml", $mods); write_file("$metaPath/marc21.xml", $marc21);
} }
# rights.xml # rights.xml
...@@ -583,12 +446,11 @@ PATCH ...@@ -583,12 +446,11 @@ PATCH
my @addMetaFile = @{ $aRefAddMetaFile }; my @addMetaFile = @{ $aRefAddMetaFile };
my @addBagInfo = @{ $refAddBagInfo }; my @addBagInfo = @{ $refAddBagInfo };
my ($mods, $marc21); my $marc21;
# get descriptive metadata from catalog # get descriptive metadata from catalog
if ($ppn) { if ($ppn) {
$marc21 = get_marc21_from_catalogue($ppn); $marc21 = get_marc21_from_catalogue($ppn);
$mods = get_mods_from_marc21($marc21);
} }
# create bag dir # create bag dir
SLUB::LZA::SIPBuilderBagIt::createDir($bagPath); SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
...@@ -600,7 +462,7 @@ PATCH ...@@ -600,7 +462,7 @@ PATCH
# create meta dir # create meta dir
SLUB::LZA::SIPBuilderBagIt::createDir($metaPath); SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
# copy not payload files to meta # copy not payload files to meta
SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $mods); SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
# add metadata for bag-info.txt # add metadata for bag-info.txt
add_metadata(\@addBagInfo, $ppn, $noppn, $marc21); add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
# metadata warnings # metadata warnings
...@@ -625,12 +487,11 @@ PATCH ...@@ -625,12 +487,11 @@ PATCH
my @addMetaFile = @{ $aRefAddMetaFile }; my @addMetaFile = @{ $aRefAddMetaFile };
my @addBagInfo = @{ $refAddBagInfo }; my @addBagInfo = @{ $refAddBagInfo };
my ($mods, $marc21); my $marc21;
# get descriptive metadata from catalog # get descriptive metadata from catalog
if ($ppn) { if ($ppn) {
$marc21 = get_marc21_from_catalogue($ppn); $marc21 = get_marc21_from_catalogue($ppn);
$mods = get_mods_from_marc21($marc21);
} }
# create bag dir # create bag dir
SLUB::LZA::SIPBuilderBagIt::createDir($bagPath); SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
...@@ -640,7 +501,7 @@ PATCH ...@@ -640,7 +501,7 @@ PATCH
# create meta dir # create meta dir
SLUB::LZA::SIPBuilderBagIt::createDir($metaPath); SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
# copy not payload files to meta # copy not payload files to meta
SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $mods); SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
# add metadata for bag-info.txt # add metadata for bag-info.txt
add_metadata(\@addBagInfo, $ppn, $noppn, $marc21); add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
# metadata warnings # metadata warnings
...@@ -665,12 +526,11 @@ PATCH ...@@ -665,12 +526,11 @@ PATCH
my @addMetaFile = @{ $aRefAddMetaFile }; my @addMetaFile = @{ $aRefAddMetaFile };
my @addBagInfo = @{ $refAddBagInfo }; my @addBagInfo = @{ $refAddBagInfo };
my ($mods, $marc21); my $marc21;
# get descriptive metadata from catalog # get descriptive metadata from catalog
if ($ppn) { if ($ppn) {
$marc21 = get_marc21_from_catalogue($ppn); $marc21 = get_marc21_from_catalogue($ppn);
$mods = get_mods_from_marc21($marc21);
} }
# create bag dir # create bag dir
SLUB::LZA::SIPBuilderBagIt::createDir($bagPath); SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
...@@ -680,7 +540,7 @@ PATCH ...@@ -680,7 +540,7 @@ PATCH
# create meta dir # create meta dir
SLUB::LZA::SIPBuilderBagIt::createDir($metaPath); SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
# copy not payload files to meta # copy not payload files to meta
SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $mods); SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
# add metadata for bag-info.txt # add metadata for bag-info.txt
add_metadata(\@addBagInfo, $ppn, $noppn, $marc21); add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
# metadata warnings # metadata warnings
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment