Skip to content
Snippets Groups Projects
Commit 10f9d776 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- refactoring, extracted query-preparation to...

- refactoring, extracted query-preparation to SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery
parent b600ae90
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,6 @@ our @EXPORT = qw(
version_elasticsearch
query_elasticsearch_count
query_elasticsearch
prepare_query
);
our @EXPORT_OK = qw();
......@@ -61,101 +60,7 @@ sub query_elasticsearch_count($protocol, $host, $port, $index_name, $query_hash)
return $res->{hits}->{total};
}
sub prepare_query ($opt) {
my $query;
my @must;
my @should;
if (exists $opt->{startrecord}) {
$query->{from} = $opt->{startrecord} - 1; # start from index 0 -> first record
}
if (exists $opt->{maxrecords}) {
$query->{size} = $opt->{maxrecords};
}
if (exists $opt->{source}) {
#push @queries, "IE.sourceMD.content=\"$opt->{source}\"";
}
if (exists $opt->{aip}) {
push @must, {
"match_phrase" => {
"uuid" => "$opt->{aip}"
}
};
}
if (exists $opt->{lzaid}) {
push @must, {
"match_phrase" => {
"transferMetadata.bim:bag-info_dict.bim:SLUBArchiv-lzaId" => "$opt->{lzaid}"
}
};
}
if (exists $opt->{descriptive}) {
#https://opster.com/guides/elasticsearch/search-apis/elasticsearch-match-multi-match-and-match-phrase-queries/#Multi-match-query
push @should, {
"multi_match" => {
"query" => "$opt->{descriptive}",
"fields" => ["transferMetadata.*"] # scan all metadata
}
}
}
if (exists $opt->{creationdate_epochs}) {
push @must, {
"range" => {
"created" => {
"gte" => $opt->{creationdate_epochs}->{from},
"lt" => $opt->{creationdate_epochs}->{to},
}
}
};
}
if (exists $opt->{modificationdate}) {
#push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
}
if (exists $opt->{'with_format'}) {
#push @queries, "FILE.generalFileCharacteristics.formatLibraryId==$opt->{'with_format'}";
}
#if (exists $opt->{'without_format'}) {
# push @queries, "FILE.generalFileCharacteristics.formatLibraryId!=$opt->{'without_format'}";
#}
if (exists $opt->{'with_valid_files'}) {
#push @queries, "FILE.fileValidation.isValid==true";
}
if (exists $opt->{'with_invalid_files'}) {
#push @queries, "FILE.fileValidation.isValid==false";
}
if (exists $opt->{'with_passed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==true";
}
if (exists $opt->{'with_failed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==false";
}
if (exists $opt->{'with_missed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==\"\"";
}
#my $query = join(" and ", @queries);
# my $must_s;
if (scalar @must > 0) {
# $must_s= {
# must => [ @must ]
# };
$query->{query}->{bool}->{must} = \@must ;
#$query->{query}->{bool}->{must} = \@must ;
}
my $should_s;
if (scalar @should > 0) {
# $should_s={
# should => [ @should ]
# };
$query->{query}->{bool}->{should} = \@should ;
}
if (exists $opt->{debug}) {
use Data::Printer;
say STDERR "\@must=",np(@must);
say STDERR "\@should=",np(@should);
}
return $query;
}
# vim: set tabstop=4
......
package SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery;
use v5.36;
sub prepare_aip_query ($opt) {
my $query;
my @must;
my @should;
if (exists $opt->{startrecord}) {
$query->{from} = $opt->{startrecord} - 1; # start from index 0 -> first record
}
if (exists $opt->{maxrecords}) {
$query->{size} = $opt->{maxrecords};
}
if (exists $opt->{source}) {
#push @queries, "IE.sourceMD.content=\"$opt->{source}\"";
}
if (exists $opt->{aip}) {
push @must, {
"match_phrase" => {
"uuid" => "$opt->{aip}"
}
};
}
if (exists $opt->{lzaid}) {
push @must, {
"match_phrase" => {
"transferMetadata.bim:bag-info_dict.bim:SLUBArchiv-lzaId" => "$opt->{lzaid}"
}
};
}
if (exists $opt->{descriptive}) {
#https://opster.com/guides/elasticsearch/search-apis/elasticsearch-match-multi-match-and-match-phrase-queries/#Multi-match-query
push @should, {
"multi_match" => {
"query" => "$opt->{descriptive}",
"fields" => ["transferMetadata.*"] # scan all metadata
}
}
}
if (exists $opt->{creationdate_epochs}) {
push @must, {
"range" => {
"created" => {
"gte" => $opt->{creationdate_epochs}->{from},
"lt" => $opt->{creationdate_epochs}->{to},
}
}
};
}
if (exists $opt->{modificationdate}) {
#push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
}
if (exists $opt->{'with_format'}) {
#push @queries, "FILE.generalFileCharacteristics.formatLibraryId==$opt->{'with_format'}";
}
#if (exists $opt->{'without_format'}) {
# push @queries, "FILE.generalFileCharacteristics.formatLibraryId!=$opt->{'without_format'}";
#}
if (exists $opt->{'with_valid_files'}) {
#push @queries, "FILE.fileValidation.isValid==true";
}
if (exists $opt->{'with_invalid_files'}) {
#push @queries, "FILE.fileValidation.isValid==false";
}
if (exists $opt->{'with_passed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==true";
}
if (exists $opt->{'with_failed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==false";
}
if (exists $opt->{'with_missed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==\"\"";
}
#my $query = join(" and ", @queries);
# my $must_s;
if (scalar @must > 0) {
# $must_s= {
# must => [ @must ]
# };
$query->{query}->{bool}->{must} = \@must ;
#$query->{query}->{bool}->{must} = \@must ;
}
my $should_s;
if (scalar @should > 0) {
# $should_s={
# should => [ @should ]
# };
$query->{query}->{bool}->{should} = \@should ;
}
if (exists $opt->{debug}) {
use Data::Printer;
say STDERR "\@must=",np(@must);
say STDERR "\@should=",np(@should);
}
return $query;
}
sub prepare_files_query {
my ($self, $opt, $args) = @_;
if (exists $opt->{pronom_id}) {
my $pronom_id = $opt->{pronom_id};
my $files_query = {
query => {
bool => {
must =>
[
{
match_phrase => {
join(".", qw(
METS
amdSec
mets:amdSec_dict
mets:techMD_dict
mets:mdWrap_dict
mets:xmlData_dict
premis:object_dict
premis:objectCharacteristics_dict
premis:format_dict
premis:formatRegistry_dict
premis:formatRegistryKey
)
) => "$pronom_id"
#"METS.amdSec.mets:amdSec_dict.mets:techMD_dict.mets:mdWrap_dict.mets:xmlData_dict.premis:object_dict.premis:formatRegistry_dict.premis:formatRegistryKey" => "$pronom_id"
#"premis:formatRegistryKey" => "$pronom_id"
}
}
],
}
},
"size" => 10000,
"_source" => {
"includes" => 'AIPUUID'
}
};
return $files_query;
}
return;
}
1;
......@@ -4,6 +4,7 @@ use strict;
use warnings;
use feature qw(say);
use SLUB::LZA::TA::Archivematica::Elasticsearch;
use SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery;
use Data::Printer;
use Date::Calc qw(Date_to_Time);
use namespace::autoclean -except => qr{SLUB::LZA::TA::.*};
......@@ -89,7 +90,7 @@ sub validate_args {
sub execute {
my ($self, $opt, $args) = @_;
my $aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::prepare_query($opt);
my $aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_aip_query($opt);
if (exists $opt->{debug}) {
use Data::Printer;
say STDERR "query:";
......
......@@ -4,6 +4,7 @@ use strict;
use warnings;
use feature qw(say);
use SLUB::LZA::TA::Archivematica::Elasticsearch;
use SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery;
use Data::Printer;
use constant VALUE_TERMINATOR => 0xff;
use constant NULL => 0xfe;
......@@ -194,42 +195,7 @@ sub find_aips_by_file_pronom_id {
my ($self, $opt, $args) = @_;
# index aipfiles needed
# find all AIPs where pronom-id like XXX
my $pronom_id = $opt->{pronom_id};
my $files_query = {
query => {
bool => {
must =>
[
{
match_phrase => {
join(".", qw(
METS
amdSec
mets:amdSec_dict
mets:techMD_dict
mets:mdWrap_dict
mets:xmlData_dict
premis:object_dict
premis:objectCharacteristics_dict
premis:format_dict
premis:formatRegistry_dict
premis:formatRegistryKey
)
) => "$pronom_id"
#"METS.amdSec.mets:amdSec_dict.mets:techMD_dict.mets:mdWrap_dict.mets:xmlData_dict.premis:object_dict.premis:formatRegistry_dict.premis:formatRegistryKey" => "$pronom_id"
#"premis:formatRegistryKey" => "$pronom_id"
}
}
],
}
},
"size" => 10000,
"_source" => {
"includes" => 'AIPUUID'
}
};
my $files_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_files_query(@_);
my $files_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch(
$SLUB::LZA::TA::config{elasticsearchprotocol},
$SLUB::LZA::TA::config{elasticsearchhost},
......@@ -258,13 +224,13 @@ sub execute {
my $aips_response;
#p($opt);
if (exists $opt->{format} and $opt->{format} eq 'pronom_id') {
$aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::prepare_query($opt);
$aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_aip_query($opt);
# replace AIP query with all match
my @aips = find_aips_by_file_pronom_id(@_);
$aips_query->{query}->{bool}->{must}->{terms}->{uuid} = \@aips;
} else {
# only index aips needed
$aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::prepare_query($opt);
$aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_aip_query($opt);
}
if (exists $opt->{debug}) {
use Data::Printer;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment