Select Git revision
slubsipbuilder.pl
slubsipbuilder.pl 21.79 KiB
#!/usr/bin/env perl
#===============================================================================
#
# FILE: slubsipbuilder.pl
#
# USAGE: ./slubsipbuilder.pl
#
# DESCRIPTION: A CLI tool to create a valid SIP for SLUBArchiv
#
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: related to official document
# "SIP Spezifikation (v1.4.2)"
# AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de)
# ORGANIZATION: SLUB
# VERSION: 1.1
# CREATED: 2019-07-23
#===============================================================================
use strict;
use warnings;
use Carp;
use 5.28.0;
package SLUB::LZA::SIPBuilder;
use DateTime::Format::ISO8601;
use File::Copy qw(cp);
use File::Find;
use Path::Tiny;
use LWP::UserAgent; # to get MARC data
use MARC::Record;
use XML::LibXML;
use XML::LibXSLT;
use XML::XPath;
use Carp;
my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
my $marc_utils_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slimUtils.xsl';
my $swb_url = 'https://sru.bsz-bw.de/swb';
my $searchkey = "pica.swn";
my $recordschema = "marcxmlvbos";
our $VERSION = '1.2';
our $with_debug=0;
# write data to file (UTF-8)
sub write_file($$) {
my $filename = $_[0];
my $value = $_[1];
open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
print $fh $value;
close($fh) || (croak "could not close file '$filename', $!");
return 1;
}
# this will patch the mods-xml as a workaround for bugs in LOCs xslt files
sub patch_mods($) {
my $modsobj = shift; # mods expected as XML Parser object
# TODO: Bugfix for /mets:mets/mets:dmdSec[1]/mets:mdWrap[1]/mets:xmlData[1]/mods:modsCollection[1]/mods:mods[1]/mods:relatedItem[2]/mods:internetMediaType[1]
my $xslt_patch_string = <<'PATCH';
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:mods="http://www.loc.gov/mods/v3"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd"
exclude-result-prefixes="xs"
version="1.0">
<xsl:output encoding="UTF-8" indent="yes" method="xml"/>
<xsl:strip-space elements="*"/>