diff --git a/lib/Archive/BagIt/Base.pm b/lib/Archive/BagIt/Base.pm index 8c748523f7ba7d633bb1408a83f07927154dedbf..83a76adf9d70f31a33be92c3438e9583a10b0db5 100644 --- a/lib/Archive/BagIt/Base.pm +++ b/lib/Archive/BagIt/Base.pm @@ -15,6 +15,7 @@ use File::stat; use Digest::MD5; use Class::Load qw(load_class); use Carp; +use List::Util qw( uniq); # VERSION @@ -109,9 +110,8 @@ sub bag_info_by_key { my $info = $self->bag_info(); if (defined $searchkey) { foreach my $entry (@{$info}) { - my ($key, $value) = each %{$entry}; - if (defined $key && $key eq $searchkey) { - return $value; + if (exists $entry->{$searchkey}) { + return $entry->{$searchkey}; } } } @@ -307,55 +307,42 @@ sub _build_tagmanifest_files { } -sub _build_tagmanifest_entries { - my ($self) = @_; - - my @tagmanifests = @{$self->tagmanifest_files}; - my $tagmanifest_entries = {}; - foreach my $tagmanifest_file (@tagmanifests) { - die("Cannot open $tagmanifest_file: $!") unless (open(my $TAGMANIFEST,"<:encoding(utf8)", $tagmanifest_file)); - while (my $line = <$TAGMANIFEST>) { +sub __build_xxxmanifest_entries { + my ($self, $xxmanifestfiles) = @_; + my @xxmanifests = @{$xxmanifestfiles}; + my $xxmanifest_entries = {}; + my $bag_path=$self->bag_path(); + foreach my $xxmanifest_file (@xxmanifests) { + die("Cannot open $xxmanifest_file: $!") unless (open(my $XXMANIFEST,"<:encoding(utf8)", $xxmanifest_file)); + my $algo = $xxmanifest_file; + $algo =~ s#^($bag_path/).bagit/#$1#; # FIXME: only for dotbagit-variant, if dotbagit will be outdated, this should be removed + $algo =~ s#^$bag_path/##; + $algo =~ s#^tag##; + $algo =~ s#^manifest-([a-z0-9]+)\.txt$#$1#; + while (my $line = <$XXMANIFEST>) { chomp($line); my($digest,$file) = split(/\s+/, $line, 2); - $tagmanifest_entries->{$file} = $digest; + $xxmanifest_entries->{$algo}->{$file} = $digest; } - close($TAGMANIFEST); - + close($XXMANIFEST); } - return $tagmanifest_entries; + return $xxmanifest_entries; } -sub _build_manifest_entries { +sub _build_tagmanifest_entries { my ($self) = @_; + return $self->__build_xxxmanifest_entries($self->tagmanifest_files); +} - my @manifests = @{$self->manifest_files}; - my $manifest_entries = {}; - foreach my $manifest_file (@manifests) { - die("Cannot open $manifest_file: $!") unless (open (my $MANIFEST, "<:encoding(utf8)", $manifest_file)); - while (my $line = <$MANIFEST>) { - chomp($line); - my ($digest,$file); - ($digest, $file) = $line =~ /^([a-f0-9]+)\s+(.+)/; - if(!$file) { - die ("This is not a valid manifest file"); - } else { - print "file: $file \n" if $DEBUG; - $manifest_entries->{$file} = $digest; - } - } - close($MANIFEST); - } - - return $manifest_entries; - +sub _build_manifest_entries { + my ($self) = @_; + return $self->__build_xxxmanifest_entries($self->manifest_files); } sub _build_payload_files{ my($self) = @_; - my $payload_dir = $self->payload_path; my $payload_reldir = $self->rel_payload_path; - my @payload=(); File::Find::find( sub{ $File::Find::name = decode ('utf8', $File::Find::name); @@ -532,6 +519,96 @@ sub load_plugins { return 1; } + +sub _verify_XXX_manifests { + my ($self, $xxprefix, $xxmanifest_entries, $files, $return_all_errors) =@_; + # Read the manifest file + #use Data::Printer; + #p( $self); + #print Dumper($self->{entries}); + my %manifest = %{$xxmanifest_entries}; + my @payload = @{ $files }; + my %invalids; + my $bagit = $self->bag_path; + my $version = $self->bag_version(); + # Evaluate each file against the manifest + foreach my $alg (keys %{$xxmanifest_entries}) { + my $manifest_alg = $self->manifests->{$alg}; + if (! defined $manifest_alg) { + next; + # TODO: return Errormessage? + } + my $digestobj = $manifest_alg->algorithm(); + + my $xxfilename = "$bagit/$xxprefix-$alg.txt"; + foreach my $local_name (@payload) { + # local_name is relative to bagit base + my ($digest); + unless (exists $manifest{$alg}{"$local_name"}) { + #system ("tree $bagit"); + #use File::Slurp; + #my $vontent = read_file($xxfilename); + #print "Content: '$vontent'\n"; + #print "Alg=$alg\n"; + #use Data::Printer; + #p( %manifest); + die("file found which is not in $xxfilename: [$local_name] (bag-path:$bagit)"); + } + if (!-r "$bagit/$local_name") {die("Cannot open $bagit/$local_name");} + $digest = $digestobj->verify_file("$bagit/$local_name"); + print "digest " . $digestobj->name() . " of $bagit/$local_name: $digest\n" if $DEBUG; + unless ($digest eq $manifest{$alg}{$local_name}) { + if ($return_all_errors) { + $invalids{$local_name} = $digest; + } + else { + die("file: $bagit/$local_name invalid, digest ($alg) calculated=$digest, but expected=$manifest{$alg}{$local_name} in file '$xxfilename'"); + } + } + delete($manifest{$alg}{$local_name}); + } + } + if($return_all_errors && keys(%invalids) ) { + foreach my $invalid (keys(%invalids)) { + print "invalid: $invalid hash: ".$invalids{$invalid}."\n"; + } + die ("bag verify for bagit $version failed with invalid files"); + } + # Make sure there are no missing files + foreach my $alg (keys %manifest){ + my @localfiles = keys(%{ $manifest{$alg} }); + if (@localfiles) { + use Data::Printer; + p( $self); + die("Missing files in bag $bagit for algorithm=$alg", join( " file=", @localfiles)); + } + } + +} + +sub _verify_manifests { + my ($self, $alg, $return_all_errors) = @_; + $self->_verify_XXX_manifests( + "manifest", + $self->manifest_entries(), + $self->payload_files(), + $return_all_errors + ); +} + +sub _verify_tagmanifests { + my ($self, $alg, $return_all_errors) = @_; + # filter tagmanifest-files + my @non_payload_files = grep { $_ !~ m/tagmanifest-[a-z0-9]+\.txt/} @{ $self->non_payload_files }; + $self->_verify_XXX_manifests( + "tagmanifest", + $self->tagmanifest_entries, + \@non_payload_files, + $return_all_errors + ); +} + + =head2 verify_bag An interface to verify a bag. @@ -547,11 +624,12 @@ sub verify_bag { #like $return all errors rather than dying on first one my $bagit = $self->bag_path; my $version = $self->bag_version(); # to call trigger - my $manifest_file = $self->metadata_path."/manifest-".$self->forced_fixity_algorithm()->name().".txt"; # FIXME: use plugin instead + my $forced_fixity_alg = $self->forced_fixity_algorithm()->name(); + my $manifest_file = $self->metadata_path."/manifest-$forced_fixity_alg.txt"; # FIXME: use plugin instead my $payload_dir = $self->payload_path; my $return_all_errors = $opts->{return_all_errors}; - my %invalids; - my @payload = @{$self->payload_files}; + + die("$manifest_file is not a regular file for bagit $version") unless -f ($manifest_file); die("$payload_dir is not a directory") unless -d ($payload_dir); @@ -560,38 +638,14 @@ sub verify_bag { die ("Bag Version $version is unsupported"); } - # Read the manifest file - #print Dumper($self->{entries}); - my %manifest = %{$self->manifest_entries}; + # check forced fixity + $self->_verify_manifests($forced_fixity_alg, $return_all_errors); + $self->_verify_tagmanifests($forced_fixity_alg, $return_all_errors); + + + # TODO: check if additional algorithms are used + - # Evaluate each file against the manifest - my $digestobj = $self->forced_fixity_algorithm(); - foreach my $local_name (@payload) { # local_name is relative to bagit base - my ($digest); - unless ($manifest{"$local_name"}) { - die ("file found not in manifest: [$local_name] (bag-path:$bagit)"); - } - if (! -r "$bagit/$local_name" ) {die ("Cannot open $bagit/$local_name");} - $digest = $digestobj->verify_file( "$bagit/$local_name"); - print "digest of $bagit/$local_name: $digest\n" if $DEBUG; - unless ($digest eq $manifest{$local_name}) { - if($return_all_errors) { - $invalids{$local_name} = $digest; - } - else { - die ("file: $bagit/$local_name invalid"); - } - } - delete($manifest{$local_name}); - } - if($return_all_errors && keys(%invalids) ) { - foreach my $invalid (keys(%invalids)) { - print "invalid: $invalid hash: ".$invalids{$invalid}."\n"; - } - die ("bag verify for bagit $version failed with invalid files"); - } - # Make sure there are no missing files - if (keys(%manifest)) { die ("Missing files in bag".p(%manifest)); } return 1; } diff --git a/tmp/bagit/test/bag-info.txt b/tmp/bagit/test/bag-info.txt index 3964b4191b230fe5ce9e6d68a5b5b948b57c690f..746e31a9f3dbc86984924a072667bc50d57eb12c 100644 --- a/tmp/bagit/test/bag-info.txt +++ b/tmp/bagit/test/bag-info.txt @@ -1,6 +1,6 @@ SLUBArchiv-sipVersion: v2020.1 Author: Taras Schevchenko -Bagging-Date: 2020-02-25 +Bagging-Date: 2020-03-09 Bag-Software-Agent: Archive::BagIt <https://metacpan.org/pod/Archive::BagIt> Payload-Oxum: 23.1 Bag-Size: 23 B diff --git a/tmp/bagit/test/tagmanifest-md5.txt b/tmp/bagit/test/tagmanifest-md5.txt index 58ffb033203ceb4d9bc54c52fd7d0a33dde7fba8..04e2a1bbfddf84d466f8f5b6c391702bd4bd44cf 100644 --- a/tmp/bagit/test/tagmanifest-md5.txt +++ b/tmp/bagit/test/tagmanifest-md5.txt @@ -1,5 +1,5 @@ 78bfb3f0189a50e8652f8fcfffebeb12 manifest-sha512.txt -4f1e53d2b0c3a731c560078589ad3202 bag-info.txt +1e2916f186cc750d5dc68bc27e7efe21 bag-info.txt 7cb0edfcba877a10170214f4f97d0319 bagit.txt 4aa497234e99e2f9302d10bed31db634 manifest-md5.txt 8895d8bf80620a0c2f79969cac46782a meta/Fallbeispiel-13.xml diff --git a/tmp/bagit/test/tagmanifest-sha512.txt b/tmp/bagit/test/tagmanifest-sha512.txt index baaeefb8b5059f00e2e96120fa145ca3ead0f335..785841bd1a7a89e4503b5ece5ee4174ebe9af1dc 100644 --- a/tmp/bagit/test/tagmanifest-sha512.txt +++ b/tmp/bagit/test/tagmanifest-sha512.txt @@ -1,5 +1,5 @@ b1c9a4bc23f472db7fff27884e69567fd62ee2dc17b36909a373b80fda9b0bcf84491ce0bc163e00ce43e4f02e085a3d5315556604cf23f40e81a25d3c0a57a8 manifest-sha512.txt -95447138cd491c96d8f65baa9a95a974055e11a414f1ccf721472ed0174a12c7cdf3e5ff02eda2477bb8628279ce59e12382ce9b2ba430901c17c967d726b344 bag-info.txt +f21212570b7b64e142c866cafcb0c891ad7beb288f439b1489b68445b93e22450f07c13a7b0ac9ec2c9d7e9cfc91bb9102af2c71954f0b7e2eb67511bd8ff6eb bag-info.txt da429ec4f5ca60aebd25aa4c26ccf51ae87609129ae76f09683c810da4af4414277250ecc0253bf7871c7a3f9c5b3ed6918a5d5c092a7dc394051601eb8bb3ab bagit.txt 0e892c5219c5b1153f6cdbdcdc4c08d5fb7d0eeb63725d4ec19110ccc28ed59d0129ccdb29117d8094f5e8f04924db6dc55b8a0416216a60df46047d4ede4114 manifest-md5.txt a9c97cc5c5f4677472965fb988cc0e4eff57beb534fcdadb838b29226d212bb00a3d3ae5ca15390f41b2d0f2f57bf02bbb779f7db41f128c32893fd0f85038d1 meta/Fallbeispiel-13.xml