From d639cc3a897a43efea0d0d6b3de1b7eb5780a60a Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <art1@andreas-romeyke.de> Date: Sat, 23 Oct 2021 17:18:47 +0200 Subject: [PATCH] =?UTF-8?q?-=20dos2unix=20-=20uses=20cmpFilesByIgnoringCRL?= =?UTF-8?q?F=5FLF()=20instead=20of=20equalFilesByIgnoring=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- t/slubsipbuilderbagit.t | 797 +++++++++++++++++++--------------------- 1 file changed, 381 insertions(+), 416 deletions(-) diff --git a/t/slubsipbuilderbagit.t b/t/slubsipbuilderbagit.t index 758b063..269398b 100644 --- a/t/slubsipbuilderbagit.t +++ b/t/slubsipbuilderbagit.t @@ -1,416 +1,381 @@ -#!/usr/bin/perl -w -#=============================================================================== -# Copyright (C) 2020 -# AUTHORS: Serhiy Bolkun (Serhiy.Bolkun@slub-dresden.de) -# Andreas Romeyke (Andreas.Romeyke@slub-dresden.de) -# Jens Steidl (Jens.Steidl@slub-dresden.de) -# -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <https://www.gnu.org/licenses/> -#=============================================================================== - -use strict; -use warnings; -use diagnostics; - -use Test::More tests => 116; -use Test::Output; -use Test::Exception; -use Test::File; -use Path::Tiny; -use File::Copy::Recursive qw(dircopy); -use File::Path qw(rmtree); -use LWP::UserAgent; -# *** Read before running tests *** -# Usage: perl -I lib/ t/slubsipbuilderbagit.t -# Note: While running tests of "SLUB_SIP_Builder", path "tmp/bagit" will be created. -# Please start test cases in the project directory to run all tests properly. -# Tested on Win10 and Debian 10 - -sub workaroundBagInfoTXT($$$){ - my $file_path = $_[0]; - my $key = $_[1]; - my $value = $_[2]; - - # read a data file into an array - open my $handle, '<', $file_path; - chomp(my @lines = <$handle>); - close $handle; - # grep and change certain array element - foreach my $zeile(@lines) { - my @keyvalue = split(/: /, $zeile, 2); # split on first : - if($keyvalue[0] eq $key){ - $zeile = "$key: $value"; - } - } - # save array back to file - open my $fh, '>', $file_path or die "Cannot open $file_path: $!"; - foreach (@lines){ - print $fh "$_\n"; # Print each entry in our array to the file - } - close $fh; -} - -sub workaroundTagmanifest($$$){ - my $file_path = $_[0]; - my $key = $_[1]; - my $value = $_[2]; - - # read a data file into an array - open my $handle, '<', $file_path; - chomp(my @lines = <$handle>); - close $handle; - # grep and change certain array element - foreach my $zeile(@lines) { - my @keyvalue = split(/ /, $zeile, 2); # split - if($keyvalue[1] eq $value){ - $zeile = "$key $value"; - } - } - # save array back to file - open my $fh, '>', $file_path or die "Cannot open $file_path: $!"; - foreach (@lines){ - print $fh "$_\n"; # Print each entry in our array to the file - } - close $fh; -} - -sub equalFilesByIgnoringCRLF_LF($$){ - my $file_path1 = $_[0]; - my $file_path2 = $_[1]; - - my $content1 = ""; - my $content2 = ""; - - binmode(STDOUT); - - open(my $fh1, '<', $file_path1) or die $!; - open(my $fh2, '<', $file_path2) or die $!; - - # changing the CRLF to LF - while (<$fh1>) { - s/\r?\n\z//; - # print "$_\n"; - $content1 .= "$_\n"; - } - - # changing the CRLF to LF - while (<$fh2>) { - s/\r?\n\z//; - # print "$_\n"; - $content2 .= "$_\n"; - } - - if($content1 ne $content2){ - return 0; - } else { - return 1; - } -} - -### prepare -BEGIN { - use Path::Tiny; - push @INC, Path::Tiny::path(__FILE__)->parent->parent->path("bin")->absolute->stringify; - #require "slubsipbuilderbagit.pl"; - #$INC{'SLUB/LZA/SIPBuilderBagIt.pm'} = 1; # needed because inlined module -} -my $exportToArchiveDate = "2020-08-05_10-26-11"; -my $ppn = "457035137"; -my $K10plus = "1655506501"; -my $noppn = ""; -my $marc21; -my $marc21_2; -my @addBagInfo = ( - {'SLUBArchiv-sipVersion' => 'v2020.1'}, - {'SLUBArchiv-externalWorkflow' => 'kitodo'}, - {'SLUBArchiv-externalId' => '10008'}, - {'SLUBArchiv-externalIsilId' => 'DE-14'}, - {'SLUBArchiv-exportToArchiveDate' => '2020-08-05T10:26:11'}, - {'SLUBArchiv-hasConservationReason' => 'false'}, - {'SLUBArchiv-archivalValueDescription' => 'Gesetzlicher Auftrag'}, - {'SLUBArchiv-rightsVersion' => '1.0'} -); -# input -my $input_path = path(__FILE__)->parent->parent->child('export_dir_kitodo')->child('bagit'); -# output -my $output_path = path(__FILE__)->parent->parent->child('tmp')->child('bagit'); -my $tmp_path = path(__FILE__)->parent->parent->child('tmp'); -# delete bagits from previous session -if(-e $tmp_path . '/bagit'){ - rmtree($tmp_path . '/bagit'); -} -if(! -d $output_path->parent) { mkdir $output_path->parent or die "Error creating directory: tmp"; } -if(! -d $output_path) { mkdir $output_path or die "Error creating directory: tmp/bagit"; } - -#~ structure of input output folders for tests -#~ . -#~ ├── export_dir_kitodo -#~ │ └─── bagit -#~ │ -#~ └── tmp -#~ └── bagit - -my $ua = LWP::UserAgent->new; -my $request = HTTP::Request->new('GET' => "https://sru.bsz-bw.de/swbf"); -my $response = $ua->request($request); -my $useragent_obj = LWP::UserAgent->new; -$useragent_obj->agent("MyApp/0.1 "); -$useragent_obj->timeout(3600); #1h - -### tests -BEGIN { use_ok("SLUB::LZA::SIPBuilderBagIt"); } -# -SKIP: { - skip "No response from server https://sru.bsz-bw.de/swb", 1 unless ! $response->is_error; - like(SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", $ppn, "pica.swn", "marcxmlvbos"), qr//, "get_marc21_from()"); -}; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("", $ppn, "pica.swn", "marcxmlvbos"); } qr/invalid parameters/, "get_marc21_from(), invalid parameters"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", "", "pica.swn", "marcxmlvbos"); } qr/invalid parameters/, "get_marc21_from(), invalid parameters"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", $ppn, "", "marcxmlvbos"); } qr/invalid parameters/, "get_marc21_from(), invalid parameters"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", $ppn, "pica.swn", ""); } qr/invalid parameters/, "get_marc21_from(), invalid parameters"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from(undef, $ppn, "pica.swn", "marcxmlvbos"); } qr/url not defined/, "get_marc21_from(\$url, \$ppn, \$key, \$schema), url undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", undef, "pica.swn", "marcxmlvbos"); } qr/ppn not defined/, "get_marc21_from(\$url, \$ppn, \$key, \$schema), ppn undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", $ppn, undef, "marcxmlvbos"); } qr/key not defined/, "get_marc21_from(\$url, \$ppn, \$key, \$schema), key undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", $ppn, "pica.swn", undef); } qr/schema not defined/, "get_marc21_from(\$url, \$ppn, \$key, \$schema), schema undef"; -# -ok( $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue($ppn), "get_marc21_from_catalogue(), valid"); -like( $marc21, qr/<datafield tag="/, "get_marc21_from_catalogue(), valid"); -ok( $marc21_2 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue($K10plus), "get_marc21_from_catalogue() K10plus, valid"); -like( $marc21_2, qr/<datafield tag="/, "get_marc21_from_catalogue() K10plus, valid"); -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue(""); } qr/ppn not defined/, "get_marc21_from_catalogue(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue(undef); } qr/ppn not defined/, "get_marc21_from_catalogue(), invalid"; -# -is( SLUB::LZA::SIPBuilderBagIt::get_marc21_text_node($marc21, { tag => "245", code => "a", key => "Title" }), "Der Fichtelberg", "get_marc21_text_node(), valid"); -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_text_node("", { tag => "245", code => "a", key => "Title" }); } qr/empty marc21/, "get_marc21_text_node(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_text_node(undef, { tag => "245", code => "a", key => "Title" }); } qr/empty marc21/, "get_marc21_text_node(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_text_node($marc21, "" ); } qr/not a mapping hash/, "get_marc21_text_node(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_text_node($marc21, undef); } qr/not a mapping hash/, "get_marc21_text_node(), invalid"; -# -my $marc_xml_unpatched = << "MARCXML"; -<?xml version="1.0"?> -<record xmlns="http://www.loc.gov/MARC21/slim"> -</record> -MARCXML -my $marc_doc = XML::LibXML->load_xml(string=>$marc_xml_unpatched); -ok( SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location($marc_doc), "add_marc21_schema_location(), valid"); -like( $marc_doc->serialize(), qr/xsi:schemaLocation/, "add_marc21_schema_location(), valid"); -throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location(0); } qr/marc xml document required/, "add_marc21_schema_location(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location(""); } qr/marc xml document required/, "add_marc21_schema_location(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location(undef); } qr/marc xml document required/, "add_marc21_schema_location(), invalid"; -# -my @baginfo_got = @addBagInfo; -my @baginfo_expected = @addBagInfo; -push @baginfo_expected, ( - { "External-Identifier" => "PPN:457035137" }, - { "Title" => "Der Fichtelberg" }, - { "Title" => "Berg der unbekannten Rekorde" }, - { "Title" => "ein Film von Dirk Schneider" }, - { "Title" => "Der Osten - entdecke wo du lebst" } -); -ok( SLUB::LZA::SIPBuilderBagIt::add_metadata(\@baginfo_got, $ppn, $noppn, $marc21), "add_metadata(), valid"); -is_deeply( \@baginfo_got, \@baginfo_expected, "add_metadata(), valid"); -throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_metadata("" , $ppn, $noppn, $marc21); } qr/not defined/, "add_metadata(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_metadata(undef, $ppn, $noppn, $marc21); } qr/not defined/, "add_metadata(), invalid"; -# -is( SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@baginfo_expected), 1, "checkForTitle(), valid"); -is( SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo), 0, "checkForTitle(), valid"); # no 'Title' -throws_ok{ SLUB::LZA::SIPBuilderBagIt::checkForTitle(""); } qr/no array/, "checkForTitle(), invalid"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::checkForTitle(undef); } qr/no array/, "checkForTitle(), invalid"; -# -is(SLUB::LZA::SIPBuilderBagIt::generateBagName($exportToArchiveDate, $ppn, $noppn), "PPN-457035137_2020-08-05_10-26-11", "generateBagName()"); -# -my $output_dir = $output_path->child("dir"); -SLUB::LZA::SIPBuilderBagIt::createDir($output_dir); -is(-d $output_dir, 1, "createDir(\$path) path is directory"); -is(-e $output_dir, 1, "createDir(\$path) directory exists in path $output_dir"); -# -my $output_metaPath = $output_path->child("dir")->child("meta"); -my $output_marc21FilePath = $output_metaPath->child("marc21.xml"); # auto generated in copyFilesToMeta() -my $output_rightsFilePath = $output_metaPath->child("rights.xml"); -my $output_lidoFilePath = $output_metaPath->child("lido.xml"); -my $output_bubbleFilePath = $output_metaPath->child("bubble.xml"); -my $output_sameMarc21FilePath = $output_metaPath->child("1.xml"); -my $output_sameRightsFilePath = $output_metaPath->child("2.xml"); -my $input_metaPath = path(__FILE__)->parent->parent->child('export_dir_kitodo')->child('bagit')->child("meta"); -my $input_marc21FilePath = $input_metaPath->child("marc21.xml"); -my $input_rightsFilePath = $input_metaPath->child("rights.xml"); -my $input_lidoFilePath = $input_metaPath->child("lido.xml"); -my $input_bubbleFilePath = $input_metaPath->child("bubble.xml"); -my @input_addMetaFile = ($input_lidoFilePath, $input_bubbleFilePath, $input_marc21FilePath, $input_rightsFilePath); -SLUB::LZA::SIPBuilderBagIt::createDir($output_metaPath); -SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $output_metaPath, $input_rightsFilePath, \@input_addMetaFile, $marc21); -is(-f $output_marc21FilePath, 1, "copyFilesToMeta() file $output_marc21FilePath exists."); -is(-f $output_rightsFilePath, 1, "copyFilesToMeta() file $output_rightsFilePath exists."); -is(-f $output_lidoFilePath, 1, "copyFilesToMeta() file $output_lidoFilePath exists."); -is(-f $output_bubbleFilePath, 1, "copyFilesToMeta() file $output_bubbleFilePath exists."); -is(-f $output_sameMarc21FilePath, 1, "copyFilesToMeta() file $output_sameMarc21FilePath exists. Renamed self included marc21.xml"); -is(-f $output_sameRightsFilePath, 1, "copyFilesToMeta() file $output_sameRightsFilePath exists. Renamed self included rights.xml"); -is(equalFilesByIgnoringCRLF_LF($output_rightsFilePath, $input_rightsFilePath), 1, "copyFilesToMeta() file $input_rightsFilePath is identical to $output_rightsFilePath"); -is(equalFilesByIgnoringCRLF_LF($output_lidoFilePath, $input_lidoFilePath), 1, "copyFilesToMeta() file $input_lidoFilePath is identical to $output_lidoFilePath"); -is(equalFilesByIgnoringCRLF_LF($output_bubbleFilePath, $input_bubbleFilePath), 1, "copyFilesToMeta() file $input_bubbleFilePath is identical to $output_bubbleFilePath"); -is(equalFilesByIgnoringCRLF_LF($output_sameMarc21FilePath, $input_marc21FilePath), 1, "copyFilesToMeta() file $input_marc21FilePath is identical to $output_sameMarc21FilePath"); -is(equalFilesByIgnoringCRLF_LF($output_sameRightsFilePath, $input_rightsFilePath), 1, "copyFilesToMeta() file $input_rightsFilePath is identical to $output_sameRightsFilePath"); -# -my $xsdName = "rights1.xsd"; -my $input_rights = path(__FILE__)->parent->parent->child('export_dir_kitodo')->child('bagit')->child("rights"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-01.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-01.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-02.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-02.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-03.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-03.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-04.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-04.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-05.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-05.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-06.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-06.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-07.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-07.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-08a-undef.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-08a-undef.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-09.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-09.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-10.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-10.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-11.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-11.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-12.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-12.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-13.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-13.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-14.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-14.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-15.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-15.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-16.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-16.xml according to $xsdName"); -is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rightsFilePath, $xsdName), 1, "validateRightsXML() meta/rights.xml according to $xsdName"); -throws_ok{ SLUB::LZA::SIPBuilderBagIt::validateRightsXML(undef, $xsdName); } qr/rightsFilePath not defined/, "validateRightsXML(\$rightsFilePath, \$xsdName), metaPath undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rightsFilePath, undef); } qr/xsd name not defined/, "validateRightsXML(\$rightsFilePath, \$xsdName), xsdName undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_metaPath . "/marc21.xml", $xsdName); } qr/failed validation/, "validateRightsXML(\$rightsFilePath, \$xsdName), rights.xml is not valid according to $xsdName"; -# -my $system_prefix = 'lin_'; -if($^O eq 'MSWin32'){ $system_prefix = 'win_'; } -my $ie = 'test'; -my $bag = 'test_bag'; -my $valid_bag = $system_prefix . 'valid_test_bag'; -my @addMetaFile = (); -my $ie_input_path = $input_path . '/' . $ie; -my $bag_output_path = $output_path . '/' . $bag; -my $data_output_path = $bag_output_path . '/data'; -my $meta_output_path = $bag_output_path . '/meta'; -my $valid_bag_path = $input_path . '/' . $valid_bag; - -SLUB::LZA::SIPBuilderBagIt::createDir($bag_output_path); -SLUB::LZA::SIPBuilderBagIt::createDir($data_output_path); -File::Copy::Recursive::dircopy($ie_input_path, $data_output_path) or die "Could not perform dircopy() of $ie_input_path to autogenerated $data_output_path: $!"; -SLUB::LZA::SIPBuilderBagIt::createDir($meta_output_path); -SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $meta_output_path, $input_rightsFilePath, \@addMetaFile, $marc21); - -ok(SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bag_output_path, \@addBagInfo), "create_slub_bagit()"); -throws_ok{ SLUB::LZA::SIPBuilderBagIt::create_slub_bagit(undef, \@addBagInfo); } qr/bagPath is not defined/, "create_slub_bagit(\$bagPath, \$refAddBagInfo), bagPath undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bag_output_path, undef); } qr/array of hashes not defined for bag-info.txt/, "create_slub_bagit(\$bagPath, \$refAddBagInfo), refAddBagInfo undef"; -throws_ok{ SLUB::LZA::SIPBuilderBagIt::create_slub_bagit("", \@addBagInfo); } qr/path is not directory/, "create_slub_bagit(\$bagPath, \$refAddBagInfo), bagPath is not a directory"; - -# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt -workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); - -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/00000001.tif', $valid_bag_path . '/data/00000001.tif'), 1, "create_slub_bagit() $bag: validate data/00000001.tif"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml'), 1, "create_slub_bagit() $bag: validate meta/marc21.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml'), 1, "create_slub_bagit() $bag: validate meta/rights.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt'), 1, "create_slub_bagit() $bag: validate bag-info.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt'), 1, "create_slub_bagit() $bag: validate bagit.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt'), 1, "create_slub_bagit() $bag: manifest-md5.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt'), 1, "create_slub_bagit() $bag: manifest-sha512.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt'), 1, "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt'), 1, "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround -# -$ie = 'test2'; -$bag = 'test2_bag'; -$valid_bag = $system_prefix .'valid_test2_bag'; -$ie_input_path = $input_path . '/' . $ie; -$bag_output_path = $output_path . '/' . $bag; -$data_output_path = $bag_output_path . '/data'; -$meta_output_path = $bag_output_path . '/meta'; -$valid_bag_path = $input_path . '/' . $valid_bag; - -ok(SLUB::LZA::SIPBuilderBagIt::buildBagWithCopyOption($ppn, $noppn, $ie_input_path, $input_rightsFilePath, \@addMetaFile, \@addBagInfo, $bag_output_path, $data_output_path, $meta_output_path), "buildBagWithCopyOption()"); - -# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt -workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); - -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/scans_tif/00000001.tif', $valid_bag_path . '/data/scans_tif/00000001.tif'), 1, "create_slub_bagit() $bag: validate data/scans_tif/00000001.tif"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/scans_tif/00000002.tif', $valid_bag_path . '/data/scans_tif/00000002.tif'), 1, "create_slub_bagit() $bag: validate data/scans_tif/00000002.tif"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/test.tiff', $valid_bag_path . '/data/test.tiff'), 1, "create_slub_bagit() $bag: validate data/test.tiff"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml'), 1, "create_slub_bagit() $bag: validate meta/marc21.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml'), 1, "create_slub_bagit() $bag: validate meta/rights.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt'), 1, "create_slub_bagit() $bag: validate bag-info.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt'), 1, "create_slub_bagit() $bag: validate bagit.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt'), 1, "create_slub_bagit() $bag: manifest-md5.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt'), 1, "create_slub_bagit() $bag: manifest-sha512.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt'), 1, "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt'), 1, "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround -# -$ie = 'test3'; -$bag = 'test3_bag'; -$valid_bag = $system_prefix . 'valid_test3_bag'; -$ie_input_path = $input_path . '/' . $ie; -$bag_output_path = $output_path . '/' . $bag; -$data_output_path = $bag_output_path . '/data'; -$meta_output_path = $bag_output_path . '/meta'; -$valid_bag_path = $input_path . '/' . $valid_bag; - -my $new_ie_input_path = $tmp_path->child($ie); -File::Copy::Recursive::dircopy($ie_input_path, $new_ie_input_path) or die "Could not perform dircopy() of $ie_input_path to autogenerated $new_ie_input_path: $!"; - -ok(SLUB::LZA::SIPBuilderBagIt::buildBagWithMoveOption($ppn, $noppn, $new_ie_input_path, $input_rightsFilePath, \@addMetaFile, \@addBagInfo, $bag_output_path, $data_output_path, $meta_output_path), "buildBagWithMoveOption()"); - -# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt -workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); - -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/img/scans_tif/00000001.tif', $valid_bag_path . '/data/img/scans_tif/00000001.tif'), 1, "create_slub_bagit() $bag: validate data/img/scans_tif/00000001.tif"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/img/scans_tif/00000002.tif', $valid_bag_path . '/data/img/scans_tif/00000002.tif'), 1, "create_slub_bagit() $bag: validate data/img/scans_tif/00000002.tif"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/test.tiff', $valid_bag_path . '/data/test.tiff'), 1, "create_slub_bagit() $bag: validate data/test.tiff"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml'), 1, "create_slub_bagit() $bag: validate meta/marc21.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml'), 1, "create_slub_bagit() $bag: validate meta/rights.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt'), 1, "create_slub_bagit() $bag: validate bag-info.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt'), 1, "create_slub_bagit() $bag: validate bagit.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt'), 1, "create_slub_bagit() $bag: manifest-md5.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt'), 1, "create_slub_bagit() $bag: manifest-sha512.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt'), 1, "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt'), 1, "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround -# -$ie = 'test4'; -$bag = 'test4_bag'; -$valid_bag = $system_prefix . 'valid_test4_bag'; -$ie_input_path = $input_path . '/' . $ie; -$bag_output_path = $output_path . '/' . $bag; -$data_output_path = $bag_output_path . '/data'; -$meta_output_path = $bag_output_path . '/meta'; -$valid_bag_path = $input_path . '/' . $valid_bag; - -$new_ie_input_path = $output_path . '/' . $ie ; -File::Copy::Recursive::dircopy($ie_input_path, $new_ie_input_path) or die "Could not perform dircopy() of $ie_input_path to autogenerated $new_ie_input_path: $!"; - -ok(SLUB::LZA::SIPBuilderBagIt::buildBagWithReplaceOption($ppn, $noppn, $new_ie_input_path, $input_rightsFilePath, \@addMetaFile, \@addBagInfo, $bag_output_path, $data_output_path, $meta_output_path), "buildBagWithReplaceOption()"); - -# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt -workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); -workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); - -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/dir_test_äöüÄÖÜß/00000001.tif', $valid_bag_path . '/data/dir_test_äöüÄÖÜß/00000001.tif'), 1, "create_slub_bagit() $bag: validate data/dir_test_äöüÄÖÜß/00000001.tif"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/data/test_äöüÄÖÜß.tiff', $valid_bag_path . '/data/test_äöüÄÖÜß.tiff'), 1, "create_slub_bagit() $bag: validate data/test_äöüÄÖÜß.tiff"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml'), 1, "create_slub_bagit() $bag: validate meta/marc21.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml'), 1, "create_slub_bagit() $bag: validate meta/rights.xml"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt'), 1, "create_slub_bagit() $bag: validate bag-info.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt'), 1, "create_slub_bagit() $bag: validate bagit.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt'), 1, "create_slub_bagit() $bag: manifest-md5.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt'), 1, "create_slub_bagit() $bag: manifest-sha512.txt"); -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt'), 1, "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround -is(equalFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt'), 1, "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround - -1; +#!/usr/bin/perl -w +#=============================================================================== +# Copyright (C) 2020 +# AUTHORS: Serhiy Bolkun (Serhiy.Bolkun@slub-dresden.de) +# Andreas Romeyke (Andreas.Romeyke@slub-dresden.de) +# Jens Steidl (Jens.Steidl@slub-dresden.de) +# +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/> +#=============================================================================== + +use strict; +use warnings; +use diagnostics; + +use Test::More tests => 116; +use Test::Output; +use Test::Exception; +use Test::Differences; +use Test::File; +use Path::Tiny; +use File::Copy::Recursive qw(dircopy); +use File::Path qw(rmtree); +# *** Read before running tests *** +# Usage: perl -I lib/ t/slubsipbuilderbagit.t +# Note: While running tests of "SLUB_SIP_Builder", path "tmp/bagit" will be created. +# Please start test cases in the project directory to run all tests properly. +# Tested on Win10 and Debian 10 + +sub workaroundBagInfoTXT($$$){ + my $file_path = $_[0]; + my $key = $_[1]; + my $value = $_[2]; + + # read a data file into an array + open my $handle, '<', $file_path; + chomp(my @lines = <$handle>); + close $handle; + # grep and change certain array element + foreach my $zeile(@lines) { + my @keyvalue = split(/: /, $zeile, 2); # split on first : + if($keyvalue[0] eq $key){ + $zeile = "$key: $value"; + } + } + # save array back to file + open my $fh, '>', $file_path or die "Cannot open $file_path: $!"; + foreach (@lines){ + print $fh "$_\n"; # Print each entry in our array to the file + } + close $fh; +} + +sub workaroundTagmanifest($$$){ + my $file_path = $_[0]; + my $key = $_[1]; + my $value = $_[2]; + + # read a data file into an array + open my $handle, '<', $file_path; + chomp(my @lines = <$handle>); + close $handle; + # grep and change certain array element + foreach my $zeile(@lines) { + my @keyvalue = split(/ /, $zeile, 2); # split + if($keyvalue[1] eq $value){ + $zeile = "$key $value"; + } + } + # save array back to file + open my $fh, '>', $file_path or die "Cannot open $file_path: $!"; + foreach (@lines){ + print $fh "$_\n"; # Print each entry in our array to the file + } + close $fh; +} + +sub cmpFilesByIgnoringCRLF_LF($$$){ + my $file_path1 = $_[0]; + my $file_path2 = $_[1]; + my $testname = $_[2]; + my @context = caller; + my $content1 = path($file_path1)->slurp; + my $content2 = path($file_path2)->slurp; + $content1=~s/(?=n)//; + $content2=~s/(?=n)//; + unified_diff; + eq_or_diff_data( $content1, $content2, $testname, { context=> 3, }) || diag "testcase at $context[1] line $context[2], with file1=$file_path1 file2=$file_path2\n\n"; + return; +} + +### prepare +BEGIN { + use Path::Tiny; + push @INC, Path::Tiny::path(__FILE__)->parent->parent->path("bin")->absolute->stringify; + #require "slubsipbuilderbagit.pl"; + #$INC{'SLUB/LZA/SIPBuilderBagIt.pm'} = 1; # needed because inlined module +} +my $exportToArchiveDate = "2020-08-05_10-26-11"; +my $ppn = "457035137"; +my $K10plus = "1655506501"; +my $noppn = ""; +my $marc21; +my $marc21_2; +my @addBagInfo = ( + {'SLUBArchiv-sipVersion' => 'v2020.1'}, + {'SLUBArchiv-externalWorkflow' => 'kitodo'}, + {'SLUBArchiv-externalId' => '10008'}, + {'SLUBArchiv-externalIsilId' => 'DE-14'}, + {'SLUBArchiv-exportToArchiveDate' => '2020-08-05T10:26:11'}, + {'SLUBArchiv-hasConservationReason' => 'false'}, + {'SLUBArchiv-archivalValueDescription' => 'Gesetzlicher Auftrag'}, + {'SLUBArchiv-rightsVersion' => '1.0'} +); +# input +my $input_path = path(__FILE__)->parent->parent->child('export_dir_kitodo')->child('bagit'); +# output +my $output_path = path(__FILE__)->parent->parent->child('tmp')->child('bagit'); +my $tmp_path = path(__FILE__)->parent->parent->child('tmp'); +# delete bagits from previous session +if(-e $tmp_path . '/bagit'){ + rmtree($tmp_path . '/bagit'); +} +if(! -d $output_path->parent) { mkdir $output_path->parent or die "Error creating directory: tmp"; } +if(! -d $output_path) { mkdir $output_path or die "Error creating directory: tmp/bagit"; } + +#~ structure of input output folders for tests +#~ . +#~ ├── export_dir_kitodo +#~ │ └─── bagit +#~ │ +#~ └── tmp +#~ └── bagit + +my $ua = LWP::UserAgent->new; +my $request = HTTP::Request->new('GET' => "https://sru.bsz-bw.de/swbf"); +my $response = $ua->request($request); +my $useragent_obj = LWP::UserAgent->new; +$useragent_obj->agent("MyApp/0.1 "); +$useragent_obj->timeout(3600); #1h + +### tests +BEGIN { use_ok("SLUB::LZA::SIPBuilderBagIt"); } +# +SKIP: { + skip "No response from server https://sru.bsz-bw.de/swb", 1 unless ! $response->is_error; + like(SLUB::LZA::SIPBuilderBagIt::get_marc21_from("https://sru.bsz-bw.de/swb", $ppn, "pica.swn", "marcxmlvbos"), qr//, "get_marc21_from()"); +}; +# +ok( $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue($ppn), "get_marc21_from_catalogue(), valid"); +like( $marc21, qr/<datafield tag="/, "get_marc21_from_catalogue(), valid"); +ok( $marc21_2 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue($K10plus), "get_marc21_from_catalogue() K10plus, valid"); +like( $marc21_2, qr/<datafield tag="/, "get_marc21_from_catalogue() K10plus, valid"); +throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue(""); } qr/ppn not defined/, "get_marc21_from_catalogue(), invalid"; +throws_ok{ SLUB::LZA::SIPBuilderBagIt::get_marc21_from_catalogue(undef); } qr/ppn not defined/, "get_marc21_from_catalogue(), invalid"; +# +is( SLUB::LZA::SIPBuilderBagIt::get_marc21_text_node($marc21, { tag => "245", code => "a", key => "Title" }), "Der Fichtelberg", "get_marc21_text_node(), valid"); +# +my $marc_xml_unpatched = << "MARCXML"; +<?xml version="1.0"?> +<record xmlns="http://www.loc.gov/MARC21/slim"> +</record> +MARCXML +my $marc_doc = XML::LibXML->load_xml(string=>$marc_xml_unpatched); +ok( SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location($marc_doc), "add_marc21_schema_location(), valid"); +like( $marc_doc->serialize(), qr/xsi:schemaLocation/, "add_marc21_schema_location(), valid"); +throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location(0); } qr/marc xml document required/, "add_marc21_schema_location(), invalid"; +throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location(""); } qr/marc xml document required/, "add_marc21_schema_location(), invalid"; +throws_ok{ SLUB::LZA::SIPBuilderBagIt::add_marc21_schema_location(undef); } qr/marc xml document required/, "add_marc21_schema_location(), invalid"; +# +my @baginfo_got = @addBagInfo; +my @baginfo_expected = @addBagInfo; +push @baginfo_expected, ( + { "External-Identifier" => "PPN:457035137" }, + { "Title" => "Der Fichtelberg" }, + { "Title" => "Berg der unbekannten Rekorde" }, + { "Title" => "ein Film von Dirk Schneider" }, + { "Title" => "Der Osten - entdecke wo du lebst" } +); +ok( SLUB::LZA::SIPBuilderBagIt::add_metadata(\@baginfo_got, $ppn, $noppn, $marc21), "add_metadata(), valid"); +is_deeply( \@baginfo_got, \@baginfo_expected, "add_metadata(), valid"); +# +is( SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@baginfo_expected), 1, "checkForTitle(), valid"); +is( SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo), 0, "checkForTitle(), valid"); # no 'Title' +# +is(SLUB::LZA::SIPBuilderBagIt::generateBagName($exportToArchiveDate, $ppn, $noppn), "PPN-457035137_2020-08-05_10-26-11", "generateBagName()"); +# +my $output_dir = $output_path->child("dir"); +SLUB::LZA::SIPBuilderBagIt::createDir($output_dir); +is(-d $output_dir, 1, "createDir(\$path) path is directory"); +is(-e $output_dir, 1, "createDir(\$path) directory exists in path $output_dir"); +# +my $output_metaPath = $output_path->child("dir")->child("meta"); +my $output_marc21FilePath = $output_metaPath->child("marc21.xml"); # auto generated in copyFilesToMeta() +my $output_rightsFilePath = $output_metaPath->child("rights.xml"); +my $output_lidoFilePath = $output_metaPath->child("lido.xml"); +my $output_bubbleFilePath = $output_metaPath->child("bubble.xml"); +my $output_sameMarc21FilePath = $output_metaPath->child("1.xml"); +my $output_sameRightsFilePath = $output_metaPath->child("2.xml"); +my $input_metaPath = path(__FILE__)->parent->parent->child('export_dir_kitodo')->child('bagit')->child("meta"); +my $input_marc21FilePath = $input_metaPath->child("marc21.xml"); +my $input_rightsFilePath = $input_metaPath->child("rights.xml"); +my $input_lidoFilePath = $input_metaPath->child("lido.xml"); +my $input_bubbleFilePath = $input_metaPath->child("bubble.xml"); +my @input_addMetaFile = ($input_lidoFilePath, $input_bubbleFilePath, $input_marc21FilePath, $input_rightsFilePath); +SLUB::LZA::SIPBuilderBagIt::createDir($output_metaPath); +SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $output_metaPath, $input_rightsFilePath, \@input_addMetaFile, $marc21); +is(-f $output_marc21FilePath, 1, "copyFilesToMeta() file $output_marc21FilePath exists."); +is(-f $output_rightsFilePath, 1, "copyFilesToMeta() file $output_rightsFilePath exists."); +is(-f $output_lidoFilePath, 1, "copyFilesToMeta() file $output_lidoFilePath exists."); +is(-f $output_bubbleFilePath, 1, "copyFilesToMeta() file $output_bubbleFilePath exists."); +is(-f $output_sameMarc21FilePath, 1, "copyFilesToMeta() file $output_sameMarc21FilePath exists. Renamed self included marc21.xml"); +is(-f $output_sameRightsFilePath, 1, "copyFilesToMeta() file $output_sameRightsFilePath exists. Renamed self included rights.xml"); +cmpFilesByIgnoringCRLF_LF($output_rightsFilePath, $input_rightsFilePath, "copyFilesToMeta() file $input_rightsFilePath is identical to $output_rightsFilePath"); +cmpFilesByIgnoringCRLF_LF($output_lidoFilePath, $input_lidoFilePath, "copyFilesToMeta() file $input_lidoFilePath is identical to $output_lidoFilePath"); +cmpFilesByIgnoringCRLF_LF($output_bubbleFilePath, $input_bubbleFilePath, "copyFilesToMeta() file $input_bubbleFilePath is identical to $output_bubbleFilePath"); +cmpFilesByIgnoringCRLF_LF($output_sameMarc21FilePath, $input_marc21FilePath, "copyFilesToMeta() file $input_marc21FilePath is identical to $output_sameMarc21FilePath"); +cmpFilesByIgnoringCRLF_LF($output_sameRightsFilePath, $input_rightsFilePath, "copyFilesToMeta() file $input_rightsFilePath is identical to $output_sameRightsFilePath"); +# +my $xsdName = "rights1.xsd"; +my $input_rights = path(__FILE__)->parent->parent->child('export_dir_kitodo')->child('bagit')->child("rights"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-01.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-01.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-02.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-02.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-03.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-03.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-04.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-04.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-05.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-05.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-06.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-06.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-07.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-07.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-08a-undef.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-08a-undef.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-09.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-09.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-10.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-10.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-11.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-11.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-12.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-12.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-13.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-13.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-14.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-14.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-15.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-15.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rights . "/Fallbeispiel-16.xml", $xsdName), 1, "validateRightsXML() Fallbeispiel-16.xml according to $xsdName"); +is(SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_rightsFilePath, $xsdName), 1, "validateRightsXML() meta/rights.xml according to $xsdName"); +throws_ok{ SLUB::LZA::SIPBuilderBagIt::validateRightsXML($input_metaPath . "/marc21.xml", $xsdName); } qr/failed validation/, "validateRightsXML(\$rightsFilePath, \$xsdName), rights.xml is not valid according to $xsdName"; +# +my $system_prefix = 'lin_'; +if($^O eq 'MSWin32'){ $system_prefix = 'win_'; } +my $ie = 'test'; +my $bag = 'test_bag'; +my $valid_bag = $system_prefix . 'valid_test_bag'; +my @addMetaFile = (); +my $ie_input_path = $input_path . '/' . $ie; +my $bag_output_path = $output_path . '/' . $bag; +my $data_output_path = $bag_output_path . '/data'; +my $meta_output_path = $bag_output_path . '/meta'; +my $valid_bag_path = $input_path . '/' . $valid_bag; + +SLUB::LZA::SIPBuilderBagIt::createDir($bag_output_path); +SLUB::LZA::SIPBuilderBagIt::createDir($data_output_path); +File::Copy::Recursive::dircopy($ie_input_path, $data_output_path) or die "Could not perform dircopy() of $ie_input_path to autogenerated $data_output_path: $!"; +SLUB::LZA::SIPBuilderBagIt::createDir($meta_output_path); +SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $meta_output_path, $input_rightsFilePath, \@addMetaFile, $marc21); + +ok(SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bag_output_path, \@addBagInfo), "create_slub_bagit()"); +throws_ok{ SLUB::LZA::SIPBuilderBagIt::create_slub_bagit(undef, \@addBagInfo); } qr/bagPath is not defined/, "create_slub_bagit(\$bagPath, \$refAddBagInfo), bagPath undef"; +throws_ok{ SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bag_output_path, undef); } qr/array of hashes not defined for bag-info.txt/, "create_slub_bagit(\$bagPath, \$refAddBagInfo), refAddBagInfo undef"; +throws_ok{ SLUB::LZA::SIPBuilderBagIt::create_slub_bagit("", \@addBagInfo); } qr/path .* is not directory/, "create_slub_bagit(\$bagPath, \$refAddBagInfo), bagPath is not a directory"; + +# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txtq +workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); + +# equalFilesByIgnoring() our testroutine to check paths +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/00000001.tif', $valid_bag_path . '/data/00000001.tif', "create_slub_bagit() $bag: validate data/00000001.tif"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml', "create_slub_bagit() $bag: validate meta/marc21.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml', "create_slub_bagit() $bag: validate meta/rights.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt', "create_slub_bagit() $bag: validate bag-info.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt', "create_slub_bagit() $bag: validate bagit.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt', "create_slub_bagit() $bag: manifest-md5.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt', "create_slub_bagit() $bag: manifest-sha512.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt', "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt', "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround +# +$ie = 'test2'; +$bag = 'test2_bag'; +$valid_bag = $system_prefix .'valid_test2_bag'; +$ie_input_path = $input_path . '/' . $ie; +$bag_output_path = $output_path . '/' . $bag; +$data_output_path = $bag_output_path . '/data'; +$meta_output_path = $bag_output_path . '/meta'; +$valid_bag_path = $input_path . '/' . $valid_bag; + +ok(SLUB::LZA::SIPBuilderBagIt::buildBagWithCopyOption($ppn, $noppn, $ie_input_path, $input_rightsFilePath, \@addMetaFile, \@addBagInfo, $bag_output_path, $data_output_path, $meta_output_path), "buildBagWithCopyOption()"); + +# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt +workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); + +# equalFilesByIgnoring() our testroutine to check paths +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/scans_tif/00000001.tif', $valid_bag_path . '/data/scans_tif/00000001.tif', "create_slub_bagit() $bag: validate data/scans_tif/00000001.tif"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/scans_tif/00000002.tif', $valid_bag_path . '/data/scans_tif/00000002.tif', "create_slub_bagit() $bag: validate data/scans_tif/00000002.tif"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/test.tiff', $valid_bag_path . '/data/test.tiff', "create_slub_bagit() $bag: validate data/test.tiff"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml', "create_slub_bagit() $bag: validate meta/marc21.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml', "create_slub_bagit() $bag: validate meta/rights.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt', "create_slub_bagit() $bag: validate bag-info.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt', "create_slub_bagit() $bag: validate bagit.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt', "create_slub_bagit() $bag: manifest-md5.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt', "create_slub_bagit() $bag: manifest-sha512.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt', "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt', "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround +# +$ie = 'test3'; +$bag = 'test3_bag'; +$valid_bag = $system_prefix . 'valid_test3_bag'; +$ie_input_path = $input_path . '/' . $ie; +$bag_output_path = $output_path . '/' . $bag; +$data_output_path = $bag_output_path . '/data'; +$meta_output_path = $bag_output_path . '/meta'; +$valid_bag_path = $input_path . '/' . $valid_bag; + +my $new_ie_input_path = $tmp_path->child($ie); +File::Copy::Recursive::dircopy($ie_input_path, $new_ie_input_path) or die "Could not perform dircopy() of $ie_input_path to autogenerated $new_ie_input_path: $!"; + +ok(SLUB::LZA::SIPBuilderBagIt::buildBagWithMoveOption($ppn, $noppn, $new_ie_input_path, $input_rightsFilePath, \@addMetaFile, \@addBagInfo, $bag_output_path, $data_output_path, $meta_output_path), "buildBagWithMoveOption()"); + +# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt +workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); + +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/img/scans_tif/00000001.tif', $valid_bag_path . '/data/img/scans_tif/00000001.tif', "create_slub_bagit() $bag: validate data/img/scans_tif/00000001.tif"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/img/scans_tif/00000002.tif', $valid_bag_path . '/data/img/scans_tif/00000002.tif', "create_slub_bagit() $bag: validate data/img/scans_tif/00000002.tif"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/test.tiff', $valid_bag_path . '/data/test.tiff', "create_slub_bagit() $bag: validate data/test.tiff"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml', "create_slub_bagit() $bag: validate meta/marc21.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml', "create_slub_bagit() $bag: validate meta/rights.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt', "create_slub_bagit() $bag: validate bag-info.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt', "create_slub_bagit() $bag: validate bagit.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt', "create_slub_bagit() $bag: manifest-md5.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt', "create_slub_bagit() $bag: manifest-sha512.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt', "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt', "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround +# +$ie = 'test4'; +$bag = 'test4_bag'; +$valid_bag = $system_prefix . 'valid_test4_bag'; +$ie_input_path = $input_path . '/' . $ie; +$bag_output_path = $output_path . '/' . $bag; +$data_output_path = $bag_output_path . '/data'; +$meta_output_path = $bag_output_path . '/meta'; +$valid_bag_path = $input_path . '/' . $valid_bag; + +$new_ie_input_path = $output_path . '/' . $ie ; +File::Copy::Recursive::dircopy($ie_input_path, $new_ie_input_path) or die "Could not perform dircopy() of $ie_input_path to autogenerated $new_ie_input_path: $!"; + +ok(SLUB::LZA::SIPBuilderBagIt::buildBagWithReplaceOption($ppn, $noppn, $new_ie_input_path, $input_rightsFilePath, \@addMetaFile, \@addBagInfo, $bag_output_path, $data_output_path, $meta_output_path), "buildBagWithReplaceOption()"); + +# workaround bag-info.txt, tagmanifest-md5.txt, tagmanifest-sha512.txt +workaroundBagInfoTXT($bag_output_path . '/bag-info.txt', 'Bagging-Date', 'xxxx-xx-xx'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-md5.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); +workaroundTagmanifest($bag_output_path . '/tagmanifest-sha512.txt', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'bag-info.txt'); + +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/dir_test_äöüÄÖÜß/00000001.tif', $valid_bag_path . '/data/dir_test_äöüÄÖÜß/00000001.tif', "create_slub_bagit() $bag: validate data/dir_test_äöüÄÖÜß/00000001.tif"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/data/test_äöüÄÖÜß.tiff', $valid_bag_path . '/data/test_äöüÄÖÜß.tiff', "create_slub_bagit() $bag: validate data/test_äöüÄÖÜß.tiff"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/marc21.xml', $valid_bag_path . '/meta/marc21.xml', "create_slub_bagit() $bag: validate meta/marc21.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/meta/rights.xml', $valid_bag_path . '/meta/rights.xml', "create_slub_bagit() $bag: validate meta/rights.xml"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bag-info.txt', $valid_bag_path . '/bag-info.txt', "create_slub_bagit() $bag: validate bag-info.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/bagit.txt', $valid_bag_path . '/bagit.txt', "create_slub_bagit() $bag: validate bagit.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-md5.txt', $valid_bag_path . '/manifest-md5.txt', "create_slub_bagit() $bag: manifest-md5.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/manifest-sha512.txt', $valid_bag_path . '/manifest-sha512.txt', "create_slub_bagit() $bag: manifest-sha512.txt"); +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-md5.txt', $valid_bag_path . '/tagmanifest-md5.txt', "create_slub_bagit() $bag: tagmanifest-md5.txt"); # workaround +cmpFilesByIgnoringCRLF_LF($bag_output_path . '/tagmanifest-sha512.txt', $valid_bag_path . '/tagmanifest-sha512.txt', "create_slub_bagit() $bag: tagmanifest-sha512.txt"); # workaround + +1; -- GitLab