Skip to content
Snippets Groups Projects
Commit c62e0adf authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- init (adapted from Submission Application)

parent 8cb78c36
No related branches found
No related tags found
No related merge requests found
package SLUB::LZA::TA::Archivematica::Elasticsearch;
# contains stuff related to Artefactuals Archivematica
# Andreas Romeyke (romeyke@slub-dresden.de)
# HINT: the server is v6.x.y of elasticsearch, therefore this code is using old client
use v5.36;
use namespace::autoclean -except => qr{import};
use Search::Elasticsearch 6.81; # install with: cpanm Search::Elasticsearch~"<7.0"
use Search::Elasticsearch::Client::6_0;
use Exporter 'import';
our @EXPORT = qw(
version_elasticsearch
query_elasticsearch_count
query_elasticsearch
prepare_query
);
our @EXPORT_OK = qw();
#$Search::Elasticsearch::Error::DEBUG=2;
sub _instantiate($protocol, $host, $port) {
return Search::Elasticsearch->new(
nodes => "$protocol://$host:$port",
#trace_to => 'Stderr',
#cnx_pool => 'Sniff',
cxn_pool => 'Static::NoPing',
client => '6_0::Direct'
);
}
sub version_elasticsearch($protocol, $host, $port) {
my $e = _instantiate($protocol, $host, $port);
my $info = $e->info;
return $info->{version}->{number};
}
sub query_elasticsearch($protocol, $host, $port, $index_name, $query_hash, $options= undef) {
# HINT: filter_paths not well supported
# next lines to deep copy hash, to avoid sideeffects in caller
my $local_query_hash;
foreach my $key (keys %{$query_hash} ) {
$local_query_hash->{$key} = $query_hash->{$key}
}
if ((defined $options) and ($options->{no_source} == 1)) {
$local_query_hash->{_source}="false"; # avoids transfering complete source tree in results
}
my $e = _instantiate($protocol, $host, $port);
my $res = $e->search(
index => $index_name,
body => $local_query_hash, #hashref
);
return $res;
}
sub query_elasticsearch_count($protocol, $host, $port, $index_name, $query_hash) {
my $res = query_elasticsearch($protocol, $host, $port, $index_name, $query_hash, {no_source => 1});
return $res->{hits}->{total};
}
sub prepare_query ($opt) {
my @must;
my @should;
# example query:
# query => {
# bool => {
# must =>
# [
# {
# match => {
# "transferMetadata.SLUBArchiv-externalWorkflow", "testcases",
# },
# },
# ],
# should => [
# {
# match => {
# "transferMetadata.SLUBArchiv-externalId", "test-sip_2020-07-17_13-40-17_96152",
# },
# }
# ]
# },
# }
# Is a SLUB-specific ID in the archive?
# curl -XGET -H 'Content-Type: application/json'
# 'localhost:9200/aips/_search?pretty=true' -d '
# {
# "query": {
# "term": {
# "transferMetadata.SLUBArchiv-lzaId.keyword":
# "SLUB:LZA:testworkflow:testcases:test-sip_2020-07-17_13-40-17_96152"
# }
# }
# }'
#
# What is the "SLUBArchiv-exportToArchiveDate" value of all matches
# curl -XGET -H 'Content-Type: application/json'
# 'localhost:9200/aips/_search?pretty=true' -d '
# {
# "query": {
# "match_all": {}
# },
# "_source": [
# "transferMetadata.SLUBArchiv-lzaId",
# "transferMetadata.SLUBArchiv-exportToArchiveDate"
# ]
# }'
#
# Find all AIPs with a SLUB-specific rights status
# curl -XGET -H 'Content-Type: application/json'
# 'localhost:9200/aips/_search?pretty=true' -d '
# {
# "query": {
# "term": {
# "transferMetadata.slubarchiv:rightsRecord_dict.slubarchiv:copyrightStatus.keyw
# ord": "copyrighted"
# }
# },
# "_source": ["uuid"]
# }'
#
# Reporting for a given date range: AIP count,
# number of archived files, size of archived data, etc.
# curl -XGET -H 'Content-Type: application/json'
# 'localhost:9200/aips/_search?pretty=true' -d '
# {
# "query": {
# "range": {
# "created": {
# "gte": 1638313200,
# "lt": 1640991600
# }
# }
# },
# "aggs": {
# "total_size": {"sum": {"field": "size"}},
# "total_file_count": {"sum": {"field": "file_count"}}
# },
# "size": 0
# }'
# Free Search - Find AIPs based on catalog data
# included by producers
# curl -XGET -H 'Content-Type: application/json'
# 'localhost:9200/aips/_search?pretty=true' -d '
# {
#"query": {
# "query_string": {
# "fields": [
# "transferMetadata.record_dict.datafield_dict.subfield",
# "transferMetadata.mods:mods_dict.mods:titleInfo_dict.mods:title"
# ],
# "query": "Riesaer Tageblatt und Anzeiger"
# }
#},
#"_source": ["uuid"]
#}'
if (exists $opt->{source}) {
#push @queries, "IE.sourceMD.content=\"$opt->{source}\"";
}
if (exists $opt->{lzaid}) {
push @must, {
"term" => {
"transferMetadata.SLUBArchiv-lzaId.keyword" =>
"$opt->{lzaid}"
}
};
}
if (exists $opt->{descriptive}) {
push @should => {
match => {
"transferMetadata.dc" => "$opt->{descriptive}",
}
}
}
if (exists $opt->{creationdate}) {
push @should => {
"created" => $opt->{creationdate}
};
}
if (exists $opt->{modificationdate}) {
#push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
}
if (exists $opt->{'with_format'}) {
#push @queries, "FILE.generalFileCharacteristics.formatLibraryId==$opt->{'with_format'}";
}
# unsupported by Rosetta <= 7.0
#if (exists $opt->{'without_format'}) {
# push @queries, "FILE.generalFileCharacteristics.formatLibraryId!=$opt->{'without_format'}";
#}
if (exists $opt->{'with_valid_files'}) {
#push @queries, "FILE.fileValidation.isValid==true";
}
if (exists $opt->{'with_invalid_files'}) {
#push @queries, "FILE.fileValidation.isValid==false";
}
if (exists $opt->{'with_passed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==true";
}
if (exists $opt->{'with_failed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==false";
}
if (exists $opt->{'with_missed_viruschecks'}) {
#push @queries, "FILE.fileVirusCheck.status==\"\"";
}
#my $query = join(" and ", @queries);
my $query;
# my $must_s;
if (scalar @must > 0) {
# $must_s= {
# must => [ @must ]
# };
$query->{query}->{bool}->{must} = [ @must ];
}
my $should_s;
if (scalar @should > 0) {
# $should_s={
# should => [ @should ]
# };
$query->{query}->{bool}->{should} = [ @should ];
}
if (exists $opt->{debug}) {
use Data::Printer;
say "opt=", np( $opt);
say "\@must=",np(@must);
say "\@should=",np(@should);
say "QUERY=",np( $query);
}
return $query;
}
# vim: set tabstop=4
1;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment