diff --git a/bin/slubsipbuilderbagit.pl b/bin/slubsipbuilderbagit.pl index 68e2bf7fba2c65bdaec6d720746252bcc4000ae9..c466793a1eed660cf25b1fcc6acc6910d6b9a4fb 100644 --- a/bin/slubsipbuilderbagit.pl +++ b/bin/slubsipbuilderbagit.pl @@ -77,8 +77,6 @@ package SLUB::LZA::SIPBuilderBagIt; our $with_debug = 0; # output debug infos and files # catalogue infos - my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl'; - my $marc_utils_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl'; my $swb_url = 'https://sru.bsz-bw.de/swb'; my $searchkey_swb = 'pica.swn'; my $searchkey_k10p = 'pica.ppn'; @@ -193,9 +191,6 @@ package SLUB::LZA::SIPBuilderBagIt; $ua->agent("MyApp/0.1 "); $ua->timeout(3600); #1h - my $xsl_dir = SLUB::LZA::SIPBuilderBagIt::check_xsl_directory(); - SLUB::LZA::SIPBuilderBagIt::check_marc21_utility($xsl_dir, $ua); - my $srubase = $url; # host my $srusearchkey = $key; # SRU search key my $sruvalue = $ppn; @@ -236,35 +231,6 @@ package SLUB::LZA::SIPBuilderBagIt; return; } - sub get_mods_from_marc21($) { - if(! $_[0]){ croak "marc21 empty!"; } - my $marc21 = shift; - - my $ua = LWP::UserAgent->new; - $ua->agent("MyApp/0.1 "); - $ua->timeout(3600); #1h - - my $xsl_dir = SLUB::LZA::SIPBuilderBagIt::check_xsl_directory(); - - my $marc_mods_patched_path = SLUB::LZA::SIPBuilderBagIt::check_marc21_mods_xsl($xsl_dir, $ua); - my $xslt = XML::LibXSLT->new(); - my $marcmods = XML::LibXML->load_xml(location => $marc_mods_patched_path, no_cdata => 1); - my $stylesheet = $xslt->parse_stylesheet($marcmods); - my $parser = XML::LibXML->new(); - my $marc = $parser->parse_string($marc21); - my $result = $stylesheet->transform($marc); - my $mods = $stylesheet->output_string($result); - my $mods_patched = $stylesheet->output_string( patch_mods($result) ); - if ($with_debug) { - say "write DEBUG_mods_unpatched.xml"; - SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_mods_unpatched.xml", $mods); - say "write DEBUG_mods.xml"; - SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_mods.xml", $mods_patched); - } - - return $mods_patched; - } - sub write_file($$){ if(! defined $_[0]) { croak "filename not defined!"; } if(! defined $_[1]) { croak "value not defined!"; } @@ -280,73 +246,6 @@ package SLUB::LZA::SIPBuilderBagIt; return 1; } - sub check_xsl_directory{ - my $xsl_dir = path(__FILE__)->parent->realpath->parent->child("xsl"); - if (! $xsl_dir->is_dir) { - say "Rebuilding XSL directory '$xsl_dir'"; - $xsl_dir->mkpath() || confess("could not mkdir '$xsl_dir', $!"); - } - return $xsl_dir->absolute(); - } - - sub check_marc21_utility($$){ - # check MARC21 utility xsl - if(! defined $_[0]) { croak "xsl directory not defined!"; } - if(! defined $_[1]) { croak "user agent not defined!"; } - if($_[0] eq "" || $_[1] eq ""){ die "invalid parameters."; } - - my $xsl_dir = shift; - my $ua = shift; - my $marc_utils_basename = path($marc_utils_url)->basename; - my $marc_utils_path = path($xsl_dir)->child($marc_utils_basename); - - if (!$marc_utils_path->is_file) { - say "Downloading MARC21 utility xsl '$marc_utils_url'"; - my $result = $ua->get($marc_utils_url); - if ($result->is_error) { - print_scalar_data("", "ERROR: Failed to download '$marc_utils_url' (required for metadata download by PPN), " . $result->error_as_HTML, "", "red"); - exit 1; - } - say "Saving MARC21 utility xsl to file '$marc_utils_path'"; - my $xsl = $result->decoded_content; - SLUB::LZA::SIPBuilderBagIt::write_file($marc_utils_path, $xsl); - } - return $marc_utils_path; - } - - sub check_marc21_mods_xsl($$){ - # check MARC21->MODS xsl - if(! defined $_[0]) { croak "xsl directory not defined!"; } - if(! defined $_[1]) { croak "user agent not defined!"; } - if($_[0] eq "" || $_[1] eq ""){ die "invalid parameters."; } - - my $xsl_dir = shift; - my $ua = shift; - my $marc_mods_basename = path($marc_mods_url)->basename; - my $marc_mods_path = path($xsl_dir)->child($marc_mods_basename)->stringify; - my $marc_mods_patched_basename = path($marc_mods_url)->basename(".xsl") . ".patched.xsl"; - my $marc_mods_patched_path = path($xsl_dir)->child($marc_mods_patched_basename); - - if (! $marc_mods_patched_path->is_file) { - my $xsl; - say "Downloading MARC21->MODS xsl '$marc_mods_url'"; - my $result = $ua->get($marc_mods_url); - if ($result->is_error) { - print_scalar_data("", "Failed to download '$marc_mods_url' (required for metadata download by PPN), " . $result->error_as_HTML, "", "red"); - exit 1; - } - say "Modifying MARC21->MODS xsl for offline use"; - $xsl = $result->decoded_content; - write_file($marc_mods_path, $xsl); - my $xsl_modified = $xsl; - my $marc_utils_path = check_marc21_utility($xsl_dir, $ua); - $xsl_modified =~ s#$marc_utils_url#$marc_utils_path#g; - say "Saving MARC21->MODS xsl to file '$marc_mods_path'"; - write_file($marc_mods_patched_path, $xsl_modified); - } - return $marc_mods_patched_path; - } - sub patch_marc_response($){ if(! defined $_[0]) { croak "marcobject not defined!"; } if($_[0] eq ""){ die "invalid parameters."; } @@ -385,42 +284,6 @@ PATCH2 return $result; } - sub patch_mods($){ - # this will patch the mods-xml as a workaround for bugs in LOCs xslt files - if(! defined $_[0]) { croak "modsobject not defined!"; } - if($_[0] eq ""){ die "invalid parameters."; } - - my $modsobj = shift; # mods expected as XML Parser object - # TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1] - my $xslt_patch_string = <<'PATCH'; -<?xml version="1.0" encoding="UTF-8"?> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:xs="http://www.w3.org/2001/XMLSchema" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xmlns:mods="http://www.loc.gov/mods/v3" - xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd" - exclude-result-prefixes="xs" - version="1.0"> - <xsl:output encoding="UTF-8" indent="yes" method="xml"/> - <xsl:strip-space elements="*"/> - <xsl:template match="//mods:mods/mods:relatedItem[mods:internetMediaType]"> - <xsl:comment>patched wrong //mods:mods/mods:relatedItem[mods:internetMediaType]</xsl:comment> - </xsl:template> - <xsl:template match="@* | node()"> - <xsl:copy> - <xsl:apply-templates select="@* | node()"/> - </xsl:copy> - </xsl:template> -</xsl:stylesheet> -PATCH - my $xslt = XML::LibXSLT->new(); - my $xslt_patch = XML::LibXML->load_xml(string => $xslt_patch_string, no_cdata => 1); - my $stylesheet = $xslt->parse_stylesheet($xslt_patch); - my $result = $stylesheet->transform($modsobj); - - return $result; - } - sub generateBagName($$$){ my $file_date = $_[0]; my $ppn = $_[1]; @@ -449,7 +312,7 @@ PATCH my $metaPath = $_[1]; my $rightsFilePath = $_[2]; my $aRefAddMetaFile = $_[3]; - my $mods = $_[4]; + my $marc21 = $_[4]; my @addMetaFile = @{ $aRefAddMetaFile }; @@ -458,7 +321,7 @@ PATCH foreach my $file(@addMetaFile){ # check reserved file names my $meta_file_name = basename($file); - if($meta_file_name eq "rights.xml" || $meta_file_name eq "mods.xml"){ + if($meta_file_name eq "rights.xml" || $meta_file_name eq "marc21.xml"){ foreach my $f(@addMetaFile){ # check if numbered xml file exists my $f_name = basename($f); @@ -466,7 +329,7 @@ PATCH $i++; } } - print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <mods.xml> is reserved.", "", "yellow"); + print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <marc21.xml> is reserved.", "", "yellow"); print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white"); copy($file, $metaPath) or die "Copy failed: $!"; rename("$metaPath/$meta_file_name", "$metaPath/$i.xml"); @@ -477,9 +340,9 @@ PATCH } } - # mods.xml - if ($ppn && $mods) { - write_file("$metaPath/mods.xml", $mods); + # marc21.xml + if ($ppn && $marc21) { + write_file("$metaPath/marc21.xml", $marc21); } # rights.xml @@ -583,12 +446,11 @@ PATCH my @addMetaFile = @{ $aRefAddMetaFile }; my @addBagInfo = @{ $refAddBagInfo }; - my ($mods, $marc21); + my $marc21; # get descriptive metadata from catalog if ($ppn) { $marc21 = get_marc21_from_catalogue($ppn); - $mods = get_mods_from_marc21($marc21); } # create bag dir SLUB::LZA::SIPBuilderBagIt::createDir($bagPath); @@ -600,7 +462,7 @@ PATCH # create meta dir SLUB::LZA::SIPBuilderBagIt::createDir($metaPath); # copy not payload files to meta - SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $mods); + SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21); # add metadata for bag-info.txt add_metadata(\@addBagInfo, $ppn, $noppn, $marc21); # metadata warnings @@ -625,12 +487,11 @@ PATCH my @addMetaFile = @{ $aRefAddMetaFile }; my @addBagInfo = @{ $refAddBagInfo }; - my ($mods, $marc21); + my $marc21; # get descriptive metadata from catalog if ($ppn) { $marc21 = get_marc21_from_catalogue($ppn); - $mods = get_mods_from_marc21($marc21); } # create bag dir SLUB::LZA::SIPBuilderBagIt::createDir($bagPath); @@ -640,7 +501,7 @@ PATCH # create meta dir SLUB::LZA::SIPBuilderBagIt::createDir($metaPath); # copy not payload files to meta - SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $mods); + SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21); # add metadata for bag-info.txt add_metadata(\@addBagInfo, $ppn, $noppn, $marc21); # metadata warnings @@ -665,12 +526,11 @@ PATCH my @addMetaFile = @{ $aRefAddMetaFile }; my @addBagInfo = @{ $refAddBagInfo }; - my ($mods, $marc21); + my $marc21; # get descriptive metadata from catalog if ($ppn) { $marc21 = get_marc21_from_catalogue($ppn); - $mods = get_mods_from_marc21($marc21); } # create bag dir SLUB::LZA::SIPBuilderBagIt::createDir($bagPath); @@ -680,7 +540,7 @@ PATCH # create meta dir SLUB::LZA::SIPBuilderBagIt::createDir($metaPath); # copy not payload files to meta - SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $mods); + SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21); # add metadata for bag-info.txt add_metadata(\@addBagInfo, $ppn, $noppn, $marc21); # metadata warnings