Skip to content
Snippets Groups Projects
Commit f97b1ccf authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- refactoring, extracting helper functions

parent 66a7aeda
No related branches found
No related tags found
No related merge requests found
......@@ -246,6 +246,155 @@ PATCH2
}
return;
}
sub create_filecopyhash {
my $directory = shift;
my $content = shift;
my %filecopyhash;
my $wanted=sub {
if (-d $_) {
# dir, do nothing
();
} else {
my $file=$File::Find::name;
if ($file !~ m#^[-A-Za-z0-9_\./]+$#) {
confess("file '$file' does not match regex '^[-A-Za-z0-9_\./]+\$'");
}
my $source = $file;
$filecopyhash{$source}->{'source'}=$file;
$file=~s#^$directory/?##;
$filecopyhash{$source}{'relative'}="data/$file";
$filecopyhash{$source}{'target'}="$content/$file";
my $fh;
open($fh, "<", $source) or confess ("Can't open '$source', $!");
binmode($fh);
my $ctx = Digest::MD5->new;
$ctx->addfile(*$fh);
close ($fh);
my $md5 = $ctx->hexdigest;
$filecopyhash{$source}{'md5sum'}=$md5;
}
};
finddepth($wanted, $directory);
return \%filecopyhash;
}
sub prepare_dmd_section_with_ppn ($) {
my $ppn = shift;
my $mods = SLUB::LZA::SIPBuilder::get_mods_from($swb_url, $ppn, $searchkey, $recordschema);
if ($with_debug) {
SLUB::LZA::SIPBuilder::write_file("DEBUG_${ppn}_mods.xml", $mods);
}
# remove the <xml /> from beginning of the answer
$mods=~ s#<\?xml version="1.0" encoding="UTF-8"\?>#<!-- removed xml header from mods part -->#;
my $dmd =<<"DMD";
<mets:dmdSec ID="DMDLOG_0000">
<!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
$mods
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
DMD
return $dmd;
}
sub prepare_dmd_section_with_noppn ($) {
my $noppn = shift;
my $mods =<<'MODS';
<mods version="3.6"
xmlns="http://www.loc.gov/mods/v3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
<identifier>$noppn</identifier>
</mods>
MODS
my $dmd =<<"DMD";
<mets:dmdSec ID="DMDLOG_0000">
<!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
$mods
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
DMD
return $dmd;
}
sub prepare_amd_section($$$$$$) {
my $export_to_archive_date = shift;
my $external_workflow = shift;
my $external_id = shift;
my $external_conservation_flag = shift;
my $external_isil = shift;
my $external_value_descr = shift;
my $amd =<<"AMD";
<mets:amdSec ID="AMD">
<!-- SIP metadata for automated processing by submission application -->
<mets:techMD ID="ARCHIVE">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="ARCHIVE">
<mets:xmlData>
<archive:record version="v2017.1" xmlns:archive="http://slub-dresden.de/slubarchiv">
<archive:exportToArchiveDate>$export_to_archive_date</archive:exportToArchiveDate>
<archive:externalId>$external_id</archive:externalId>
<archive:externalWorkflow>$external_workflow</archive:externalWorkflow>
<archive:hasConservationReason>$external_conservation_flag</archive:hasConservationReason>
<archive:externalIsilId>$external_isil</archive:externalIsilId>
<archive:archivalValueDescription>$external_value_descr</archive:archivalValueDescription>
</archive:record>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
</mets:amdSec>
AMD
return $amd;
}
sub prepare_files_sections($) {
my $filecopyhash = shift;
my @fsec;
my $i=0;
foreach my $fkey (sort keys (%{$filecopyhash})) {
push @fsec, sprintf("<mets:file ID=\"FILE_%015u_LZA\" CHECKSUMTYPE=\"MD5\" CHECKSUM=\"%s\">", $i, $filecopyhash->{$fkey}->{"md5sum"});
push @fsec, sprintf("<mets:FLocat xmlns:xlink=\"http://www.w3.org/1999/xlink\" LOCTYPE=\"URL\" xlink:href=\"file://%s\"/>", $filecopyhash->{$fkey}->{"relative"});
push @fsec, "</mets:file>";
$i++;
}
my $files = join("\n", @fsec);
my $filesec=<<"FILESEC";
<mets:fileSec>
<mets:fileGrp USE="LZA">
$files
</mets:fileGrp>
</mets:fileSec>
FILESEC
return $filesec;
}
sub prepare_struct_map($) {
my $filecopyhash = shift;
my @ssec;
my $i=0;
foreach my $fkey (sort keys (%{$filecopyhash})) {
push @ssec, sprintf("<mets:div ID=\"PHYS_%015u_LZA\" TYPE=\"fileorderSequence\">", $i);
push @ssec, sprintf("<mets:fptr FILEID=\"FILE_%015u_LZA\" />", $i);
push @ssec, "</mets:div>";
$i++;
}
my $structs = join("\n", @ssec);
my $structmap =<<"STRUCTMAP";
<mets:structMap TYPE="PHYSICAL">
<mets:div ID="PHYS_0000" TYPE="ieDir">
$structs
</mets:div>
</mets:structMap>
STRUCTMAP
return $structmap;
}
# end package
package main;
......@@ -311,11 +460,6 @@ if ($external_id !~ m#^[a-z0-9]+$#) { confess("you need to specify a val
if ($external_workflow !~ m#^[a-z0-9]+$#) { confess("you need to specify a valid external workflow (^[a-z0-9]+\$)"); }
if (!$external_value_descr) { confess("you need to specify an external value description (reason for archiving)"); }
#===============================================================================
#===============================================================================
sub main {
......@@ -324,142 +468,31 @@ sub main {
my $file_date = $export_to_archive_date;
$file_date =~ s/T/_/g; # replace 'T' with '_'
$file_date =~ s/:/-/g; # replace ':' with '-'
# create output dir
#~ if (! -d $output) {
#~ mkpath("$output") || confess("could not create SIP directory for '$output', $!");
#~ }
#~ $output = abs_path($output);
# prepare dirs
my $sip_root_dir = (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
my $content = path($output)->child($sip_root_dir)->child("data")->stringify;
if (!defined $as_zip) {
path($content)->mkpath;
}
my $filecopyhash = SLUB::LZA::SIPBuilder::create_filecopyhash($directory, $content);
# prepare dmd-sec
my $mods;
if (defined $ppn) {
$mods = SLUB::LZA::SIPBuilder::get_mods_from($swb_url, $ppn, $searchkey, $recordschema);
if ($with_debug) {
SLUB::LZA::SIPBuilder::write_file("DEBUG_${ppn}_mods.xml", $mods);
}
# remove the <xml /> from beginning of the answer
$mods=~ s#<\?xml version="1.0" encoding="UTF-8"\?>#<!-- removed xml header from mods part -->#;
} elsif (defined $noppn) {
$mods =<<'MODS';
<mods version="3.6"
xmlns="http://www.loc.gov/mods/v3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd">
<identifier>$noppn</identifier>
</mods>
MODS
}
my $dmd =<<'DMD';
<mets:dmdSec ID="DMDLOG_0000">
<!-- bibliographic metadata -->
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
$mods
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
DMD
my $dmd = (defined $ppn)? SLUB::LZA::SIPBuilder::prepare_dmd_section_with_ppn( $ppn ) : SLUB::LZA::SIPBuilder::prepare_dmd_section_with_noppn( $noppn );
# prepare amd-sec
my $amd =<<'AMD';
<mets:amdSec ID="AMD">
<!-- SIP metadata for automated processing by submission application -->
<mets:techMD ID="ARCHIVE">
<mets:mdWrap MDTYPE="OTHER" MIMETYPE="text/xml" OTHERMDTYPE="ARCHIVE">
<mets:xmlData>
<archive:record version="v2017.1" xmlns:archive="http://slub-dresden.de/slubarchiv">
<archive:exportToArchiveDate>$export_to_archive_date</archive:exportToArchiveDate>
<archive:externalId>$external_id</archive:externalId>
<archive:externalWorkflow>$external_workflow</archive:externalWorkflow>
<archive:hasConservationReason>$external_conservation_flag</archive:hasConservationReason>
<archive:externalIsilId>$external_isil</archive:externalIsilId>
<archive:archivalValueDescription>$external_value_descr</archive:archivalValueDescription>
</archive:record>
</mets:xmlData>
</mets:mdWrap>
</mets:techMD>
</mets:amdSec>
AMD
# create filecopyhash
my %filecopyhash;
my $wanted=sub {
if (-d $_) {
# dir, do nothing
();
} else {
my $file=$File::Find::name;
if ($file !~ m#^[-A-Za-z0-9_\./]+$#) {
confess("file '$file' does not match regex '^[-A-Za-z0-9_\./]+\$'");
}
my $source = $file;
$filecopyhash{$source}->{'source'}=$file;
$file=~s#^$directory/?##;
$filecopyhash{$source}{'relative'}="data/$file";
$filecopyhash{$source}{'target'}="$content/$file";
my $fh;
open($fh, "<", $source) or confess ("Can't open '$source', $!");
binmode($fh);
my $ctx = Digest::MD5->new;
$ctx->addfile(*$fh);
close ($fh);
my $md5 = $ctx->hexdigest;
$filecopyhash{$source}{'md5sum'}=$md5;
}
};
finddepth($wanted, $directory);
my $amd = SLUB::LZA::SIPBuilder::prepare_amd_section(
$export_to_archive_date,
$external_workflow,
$external_id,
$external_conservation_flag,
$external_isil,
$external_value_descr
);
# create fileSec
my $filesec=<<'FILESEC1';
<mets:fileSec>
<mets:fileGrp USE="LZA">
FILESEC1
{
my @fsec;
my $i=0;
foreach my $fkey (sort keys (%filecopyhash)) {
push @fsec, sprintf("<mets:file ID=\"FILE_%015u_LZA\" CHECKSUMTYPE=\"MD5\" CHECKSUM=\"%s\">", $i, $filecopyhash{$fkey}->{"md5sum"});
push @fsec, sprintf("<mets:FLocat xmlns:xlink=\"http://www.w3.org/1999/xlink\" LOCTYPE=\"URL\" xlink:href=\"file://%s\"/>", $filecopyhash{$fkey}->{"relative"});
push @fsec, "</mets:file>";
$i++;
}
$filesec = join("\n", $filesec, @fsec);
}
$filesec = $filesec . <<'FILESEC2';
</mets:fileGrp>
</mets:fileSec>
FILESEC2
my $filesec = SLUB::LZA::SIPBuilder::prepare_files_sections($filecopyhash);
# prepare structmap
my $structmap =<<'STRUCTMAP1';
<mets:structMap TYPE="PHYSICAL">
<mets:div ID="PHYS_0000" TYPE="ieDir">
STRUCTMAP1
{
my @ssec;
my $i=0;
foreach my $fkey (sort keys (%filecopyhash)) {
push @ssec, sprintf("<mets:div ID=\"PHYS_%015u_LZA\" TYPE=\"fileorderSequence\">", $i);
push @ssec, sprintf("<mets:fptr FILEID=\"FILE_%015u_LZA\" />", $i);
push @ssec, "</mets:div>";
$i++;
}
$structmap = join("\n", $structmap, @ssec);
}
$structmap = $structmap . <<'STRUCTMAP2';
</mets:div>
</mets:structMap>
STRUCTMAP2
my $structmap = SLUB::LZA::SIPBuilder::prepare_struct_map($filecopyhash);
# create sip.xml
my $sip =<<'METS';
my $sip =<<"METS";
<?xml version="1.0" encoding="utf-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
......@@ -471,12 +504,12 @@ STRUCTMAP2
</mets:mets>
METS
# compress if needed
# write stuff out
if (!defined $as_zip) {
SLUB::LZA::SIPBuilder::write_file( path($output)->child($sip_root_dir)->child("sip.xml")->stringify, $sip );
# copy source to target
foreach my $source (sort keys (%filecopyhash)) {
my $target = path($filecopyhash{$source}->{"target"})->stringify; # CHECK ON WINDOWS
foreach my $source (sort keys (%{$filecopyhash})) {
my $target = path($filecopyhash->{$source}->{"target"})->stringify; # CHECK ON WINDOWS
my $basename = path($target)->parent->stringify;
if (! -d $basename) {
path($basename)->mkpath;
......@@ -490,8 +523,8 @@ METS
my $zip = Archive::Zip::SimpleZip->new( $zip_file_path, Zip64=>1 );
$zip->addString($sip, Name=>path($sip_root_dir)->child("sip.xml")->stringify);
# copy source to target
foreach my $source (sort keys (%filecopyhash)) {
my $target = path($sip_root_dir)->child($filecopyhash{$source}->{"relative"})->stringify; # CHECK ON WINDOWS
foreach my $source (sort keys (%{$filecopyhash})) {
my $target = path($sip_root_dir)->child($filecopyhash->{$source}->{"relative"})->stringify; # CHECK ON WINDOWS
my $basename = path($target)->parent->stringify;
$zip->add( $source, Name=> $target) || confess ("could not zip copy from '$source' to '$target', $!");
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment