Skip to content
Snippets Groups Projects
Commit b852f283 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- added find_aips_by_file_pronom_id()

- added logic to select aips by pronom id
- minor indent
parent 68f8622b
No related branches found
No related tags found
No related merge requests found
...@@ -34,8 +34,6 @@ Examples: ...@@ -34,8 +34,6 @@ Examples:
'$0 search -s copyrighted' '$0 search -s copyrighted'
* Which AIPs were modified in 2021-05-31? * Which AIPs were modified in 2021-05-31?
'$0 search -m 2021-05-31' '$0 search -m 2021-05-31'
Which AIPs have at least one invalid file?
'$0 search --with-invalid-files'
* Which AIPs with mkv-files are in archive? * Which AIPs with mkv-files are in archive?
'$0 search --pronom-format=fmt/569' '$0 search --pronom-format=fmt/569'
...@@ -97,7 +95,6 @@ sub opt_spec { ...@@ -97,7 +95,6 @@ sub opt_spec {
[ 'with-slubarchiv-externalisilid' => 'also returns AIP slubarchiv-externalisilid'], [ 'with-slubarchiv-externalisilid' => 'also returns AIP slubarchiv-externalisilid'],
[ 'with-slubarchiv-externalworkflow' => 'also returns AIP slubarchiv-externalworkflow'], [ 'with-slubarchiv-externalworkflow' => 'also returns AIP slubarchiv-externalworkflow'],
[ 'with-slubarchiv-hasconservationreason' => 'also returns AIP slubarchiv-hasconservationreason'], [ 'with-slubarchiv-hasconservationreason' => 'also returns AIP slubarchiv-hasconservationreason'],
[ 'with-slubarchiv-lzaid' => 'also returns AIP slubarchiv-lzaid'],
[ 'with-slubarchiv-lzaid|with-lzaid' => 'also returns lzaid' ], [ 'with-slubarchiv-lzaid|with-lzaid' => 'also returns lzaid' ],
[ 'with-slubarchiv-migrated-aip|with-migrated-aip' => 'also returns AIP slubarchiv-migrated-aip'], [ 'with-slubarchiv-migrated-aip|with-migrated-aip' => 'also returns AIP slubarchiv-migrated-aip'],
[ 'with-slubarchiv-origin-ais|with-origin-ais' => 'also returns AIP slubarchiv-origin-ais'], [ 'with-slubarchiv-origin-ais|with-origin-ais' => 'also returns AIP slubarchiv-origin-ais'],
...@@ -123,8 +120,6 @@ sub line_result_mapper { ...@@ -123,8 +120,6 @@ sub line_result_mapper {
my ($partial_result, $opt) = @_; my ($partial_result, $opt) = @_;
my %line; my %line;
$line{aipid}= $partial_result->{_source}->{uuid}; $line{aipid}= $partial_result->{_source}->{uuid};
p($partial_result->{_source});
p($opt);
my @tmd = qw( my @tmd = qw(
External-Description External-Description
External-Identifier External-Identifier
...@@ -161,6 +156,67 @@ sub line_result_mapper { ...@@ -161,6 +156,67 @@ sub line_result_mapper {
\%line; \%line;
} }
sub find_aips_by_file_pronom_id {
my ($self, $opt, $args) = @_;
# index aipfiles needed
# find all AIPs where pronom-id like XXX
my $pronom_id = $opt->{pronom_id};
my $files_query = {
query => {
bool => {
must =>
[
{
match_phrase => {
join(".", qw(
METS
amdSec
mets:amdSec_dict
mets:techMD_dict
mets:mdWrap_dict
mets:xmlData_dict
premis:object_dict
premis:objectCharacteristics_dict
premis:format_dict
premis:formatRegistry_dict
premis:formatRegistryKey
)
) => "$pronom_id"
#"METS.amdSec.mets:amdSec_dict.mets:techMD_dict.mets:mdWrap_dict.mets:xmlData_dict.premis:object_dict.premis:formatRegistry_dict.premis:formatRegistryKey" => "$pronom_id"
#"premis:formatRegistryKey" => "$pronom_id"
}
}
],
}
},
"_source" => {
"includes" => 'AIPUUID'
}
};
my $files_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch(
$SLUB::LZA::TA::config{elasticsearchprotocol},
$SLUB::LZA::TA::config{elasticsearchhost},
$SLUB::LZA::TA::config{elasticsearchport},
'aipfiles', # indexname
$files_query, # query_hash ref
);
if (exists $opt->{debug}) {
use Data::Printer;
say "------------------------";
say "query=",np( $files_query);
say "------------------------";
}
my %aips;
#p($files_response->{hits}->{hits});
foreach my $match (@{$files_response->{hits}->{hits}}) {
my $aip = $match->{_source}->{AIPUUID};
$aips{$aip} = 1;
}
return keys %aips;
}
sub execute { sub execute {
my ($self, $opt, $args) = @_; my ($self, $opt, $args) = @_;
if ($opt->{target_version}) { if ($opt->{target_version}) {
...@@ -169,56 +225,77 @@ sub execute { ...@@ -169,56 +225,77 @@ sub execute {
$SLUB::LZA::TA::config{elasticsearchhost}, $SLUB::LZA::TA::config{elasticsearchhost},
$SLUB::LZA::TA::config{elasticsearchport} $SLUB::LZA::TA::config{elasticsearchport}
); );
exit (0); exit(0);
}
my $aips_query;
my $aips_response;
#p($opt);
if ($opt->{format} eq 'pronom_id') {
$aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::prepare_query($opt);
# replace AIP query with all match
my @aips = find_aips_by_file_pronom_id(@_);
$aips_query->{query}->{bool}->{must}->{terms}->{uuid} = \@aips;
} else {
# only index aips needed
$aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::prepare_query($opt);
} }
my $query = SLUB::LZA::TA::Archivematica::Elasticsearch::prepare_query($opt); if (exists $opt->{debug}) {
my $response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch( use Data::Printer;
say "------------------------";
say "query=",np( $aips_query);
say "------------------------";
}
#p($aips_query);
$aips_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch(
$SLUB::LZA::TA::config{elasticsearchprotocol}, $SLUB::LZA::TA::config{elasticsearchprotocol},
$SLUB::LZA::TA::config{elasticsearchhost}, $SLUB::LZA::TA::config{elasticsearchhost},
$SLUB::LZA::TA::config{elasticsearchport}, $SLUB::LZA::TA::config{elasticsearchport},
'aips', # indexname 'aips', # indexname
$query, # query_hash ref $aips_query, # query_hash ref
); );
my @result = map { my @result = map {
line_result_mapper($_, $opt); line_result_mapper($_, $opt);
} @{$response->{hits}->{hits} }; } @{$aips_response->{hits}->{hits}};
my @headers = sort keys %{$result[0]}; my @headers = sort keys %{$result[0]};
my $aip_only = List::Util::none {$_ =~ m/^with/} keys %{ $opt }; my $aip_only = List::Util::none {$_ =~ m/^with/} keys %{$opt};
if (!exists $opt->{output_format}) { if (!exists $opt->{output_format}) {
if ($aip_only) { if ($aip_only) {
$opt->{output_format} = 'output_as_csv' $opt->{output_format} = 'output_as_csv'
} else { }
else {
$opt->{output_format} = 'output_as_rsv'; $opt->{output_format} = 'output_as_rsv';
} }
} }
if ($opt->{output_format} eq 'output_as_csv') { if ($opt->{output_format} eq 'output_as_csv') {
binmode(STDOUT, ':encoding(UTF-8)'); binmode(STDOUT, ':encoding(UTF-8)');
say join(",", @headers); say join(",", @headers);
say join("\n", map { say join("\n", map {
my $line = $_; my $line = $_;
my $res_line = join( my $res_line = join(
",", ",",
map { $line->{$_} } @headers map {$line->{$_}} @headers
);
$res_line;
} @result
); );
} elsif ($opt->{output_format} eq 'output_as_rsv') { $res_line;
binmode(STDOUT, ':bytes'); } @result
print join(CHR_VALUE_TERMINATOR, @headers).CHR_VALUE_TERMINATOR; );
print CHR_ROW_TERMINATOR; }
print join("", map { elsif ($opt->{output_format} eq 'output_as_rsv') {
my $line = $_; binmode(STDOUT, ':bytes');
my $res_line = join("", map { $line->{$_}.CHR_VALUE_TERMINATOR } @headers print join(CHR_VALUE_TERMINATOR, @headers) . CHR_VALUE_TERMINATOR;
); print CHR_ROW_TERMINATOR;
$res_line.CHR_ROW_TERMINATOR; print join("", map {
} @result my $line = $_;
my $res_line = join("", map {$line->{$_} . CHR_VALUE_TERMINATOR} @headers
); );
binmode(STDOUT, ':encoding(UTF-8)'); $res_line . CHR_ROW_TERMINATOR;
} elsif ($opt->{output_format} eq 'output_as_raw') { } @result
say np($response); );
binmode(STDOUT, ':encoding(UTF-8)');
}
elsif ($opt->{output_format} eq 'output_as_raw') {
say np($aips_response);
} }
return 1; return 1;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment