Skip to content
Snippets Groups Projects
Commit 5b710fe2 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- bugfix, handles utf8-encoded catalogue data correctly

- bugfix, filename check should allow path delimiter
parent fe68c3d1
No related branches found
No related tags found
No related merge requests found
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# "SIP Spezifikation (v1.4.1)" # "SIP Spezifikation (v1.4.1)"
# AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de) # AUTHOR: Andreas Romeyke (romeyke@slub-dresden.de)
# ORGANIZATION: SLUB # ORGANIZATION: SLUB
# VERSION: 1.0 # VERSION: 1.1
# CREATED: 10.05.2016 # CREATED: 10.05.2016
#=============================================================================== #===============================================================================
...@@ -74,12 +74,45 @@ PATCH ...@@ -74,12 +74,45 @@ PATCH
return $result; return $result;
} }
sub patch_marc_response($) {
my $marcobj = shift; # marcobj expected as XML Parser object
my $xslt_patch_string =<<PATCH2;
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.loc.gov/MARC21/slim"
exclude-result-prefixes="" version="1.0">
<xsl:template match="/record">
<xsl:element name="collection">
<xsl:element name="{local-name()}" namespace="http://www.loc.gov/MARC21/slim">
<xsl:apply-templates select="node() | @*"/>
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="*">
<xsl:element name="{local-name()}" namespace="http://www.loc.gov/MARC21/slim">
<xsl:apply-templates select="node() | @*"/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
PATCH2
my $xslt = XML::LibXSLT->new();
my $xslt_patch = XML::LibXML->load_xml(string=>$xslt_patch_string, no_cdata=>1);
my $stylesheet = $xslt->parse_stylesheet ( $xslt_patch);
my $result = $stylesheet->transform( $marcobj );
return $result;
}
# the "old" approach does not handle umlauts or UTF8-chars above ASCII table
# old: http://swb2.bsz-bw.de/sru/DB=2.1/username=/password=/?query=pica.ppn+%3D+"494384174"&startRecord=1&maximumRecords=10&recordSchema=marcxml
# new: http://swb2.bsz-bw.de/sru/DB=2.1/username=/password=/?query=pica.ppn+%3D+"494384174"&startRecord=1&maximumRecords=10&recordSchema=marc21&recordPacking=xml&version=1.1
sub get_mods_from ($$) { # $mods = ($url, $ppn) sub get_mods_from ($$) { # $mods = ($url, $ppn)
my $url = shift; my $url = shift;
my $ppn = shift; # example: "457035137" for "Der Fichtelberg" my $ppn = shift; # example: "457035137" for "Der Fichtelberg"
#### where to find XSLT #### where to find XSLT
# my $marc_dc_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slim2RDFDC.xsl'; # my $marc_dc_url = 'http://www.loc.gov/standards/marcxml/xslt/MARC21slim2RDFDC.xsl';
my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-5.xsl'; my $marc_mods_url = 'http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3-6.xsl';
my $ua = LWP::UserAgent->new; my $ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1 "); $ua->agent("MyApp/0.1 ");
...@@ -88,23 +121,34 @@ sub get_mods_from ($$) { # $mods = ($url, $ppn) ...@@ -88,23 +121,34 @@ sub get_mods_from ($$) { # $mods = ($url, $ppn)
my $srusearchkey="pica.ppn"; my $srusearchkey="pica.ppn";
my $sruvalue=$ppn; my $sruvalue=$ppn;
my $srumaxrecords=1; my $srumaxrecords=1;
my $sruschema="marcxml"; #my $sruschema="marcxml";
my $sru = "${srubase}?query=${srusearchkey}+%3D+%22${sruvalue}%22&startRecord=1&maximumRecords=${srumaxrecords}&recordSchema=${sruschema}"; my $sruschema="marc21";
#my $sru = "${srubase}?query=${srusearchkey}+%3D+%22${sruvalue}%22&startRecord=1&maximumRecords=${srumaxrecords}&recordSchema=${sruschema}";
my $sru = "${srubase}?query=${srusearchkey}+%3D+%22${sruvalue}%22&startRecord=1&maximumRecords=${srumaxrecords}&recordSchema=${sruschema}&recordPacking=xml&version=1.1";
#p ($sru); # debug output #p ($sru); # debug output
my $record = $ua->get($sru); # ask SWB for given PPN my $record = $ua->get($sru); # ask SWB for given PPN
if ($record->is_success) { if ($record->is_success) {
# parse ZiNG repsonse, extract MARC-data # parse ZiNG repsonse, extract MARC-data
my $xp = XML::XPath->new( $record->decoded_content ); my $xp = XML::XPath->new( $record->decoded_content );
my $marcblob = $xp->findnodes_as_string('/ZiNG:searchRetrieveResponse/ZiNG:records/ZiNG:record/ZiNG:recordData/*');
my $parser = XML::LibXML->new(); my $parser = XML::LibXML->new();
if ($with_debug) {
write_file("DEBUG_${ppn}_response.xml", {binmode => ':utf8'}, $record->decoded_content);
}
my $marcblob = $parser->parse_string(
$xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*')
);
my $marcblob_patched = patch_marc_response( $marcblob );
if ($with_debug) {
write_file("DEBUG_${ppn}_marc_unpatched.xml", {binmode => ':utf8'}, $marcblob);
write_file("DEBUG_${ppn}_marc.xml", {binmode => ':utf8'}, $marcblob_patched);
}
my $xslt = XML::LibXSLT->new(); my $xslt = XML::LibXSLT->new();
my $marcmods = XML::LibXML->load_xml(location=>$marc_mods_url, no_cdata=>1); my $marcmods = XML::LibXML->load_xml(location=>$marc_mods_url, no_cdata=>1);
my $stylesheet = $xslt->parse_stylesheet ( $marcmods); my $stylesheet = $xslt->parse_stylesheet ( $marcmods);
my $marc = $parser->parse_string( $marcblob ); my $marc = $parser->parse_string( $marcblob_patched );
my $result = $stylesheet->transform( $marc); my $result = $stylesheet->transform( $marc);
if ($with_debug) { if ($with_debug) {
write_file("DEBUG_${ppn}_marc.xml", $marcblob); write_file("DEBUG_${ppn}_unpatched_mods.xml", {binmode => ':utf8'}, $stylesheet->output_string( $result ));
write_file("DEBUG_${ppn}_unpatched_mods.xml", $stylesheet->output_string( $result ));
} }
$result = patch_mods( $result); $result = patch_mods( $result);
my $result_string = $stylesheet->output_string( $result ); my $result_string = $stylesheet->output_string( $result );
...@@ -179,8 +223,8 @@ my $wanted=sub { ...@@ -179,8 +223,8 @@ my $wanted=sub {
() ()
} else { } else {
my $file=$File::Find::name; my $file=$File::Find::name;
if ($file !~ m#^[-A-Za-z0-9_\.]+$#) { if ($file !~ m#^[-A-Za-z0-9_\./]+$#) {
confess("file '$file' does not match regex '^[-A-Za-z0-9_\.]+\$'"); confess("file '$file' does not match regex '^[-A-Za-z0-9_\./]+\$'");
} }
my $source = $file; my $source = $file;
$filecopyhash{$source}->{'source'}=$file; $filecopyhash{$source}->{'source'}=$file;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment