From a620b41e621259df64a5c2dcad46a30e3dafddac Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <andreas.romeyke@slub-dresden.de> Date: Mon, 14 Apr 2025 19:06:48 +0200 Subject: [PATCH] - fixed bug, query twice, first one to get total counts, second one to set pagination (disable it) --- lib/SLUB/LZA/TA/Command/sample_testing.pm | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/SLUB/LZA/TA/Command/sample_testing.pm b/lib/SLUB/LZA/TA/Command/sample_testing.pm index fe1ea1d..015d1c0 100644 --- a/lib/SLUB/LZA/TA/Command/sample_testing.pm +++ b/lib/SLUB/LZA/TA/Command/sample_testing.pm @@ -201,7 +201,7 @@ sub _execute { # next lines extend query with reporting $aips_query->{'_source'} = {'includes' => ['uuid', 'filePath', 'transferMetadata.bim:bag-info_dict.bim:SLUBArchiv-lzaId', 'created']}; - #p($aips_query); + $aips_query->{size} = 1; # we only need to find total size $aips_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch( $SLUB::LZA::TA::config{elasticsearch_protocol}, $SLUB::LZA::TA::config{elasticsearch_host}, @@ -218,6 +218,24 @@ sub _execute { if ($opt->{debug}) { say STDERR "found $max_found_aips AIPs, use only a $opt->{factor} subsample of $sample_size AIPs"; } + + $aips_query->{size} = $max_found_aips; + # query again + $aips_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch( + $SLUB::LZA::TA::config{elasticsearch_protocol}, + $SLUB::LZA::TA::config{elasticsearch_host}, + $SLUB::LZA::TA::config{elasticsearch_port}, + 'aips', # indexname + $aips_query, # query_hash ref + { + debug => $opt->{debug}, + + } + ); + my $max_found_aips2 = $aips_response->{hits}->{total}; + my $max_found_aips2_selected = scalar( @{ $aips_response->{hits}->{hits} } ); + die "different result set between two consecutive calls of ElasticSearch query" if ($max_found_aips != $max_found_aips2); + die "limited size in response of ElasticSearch query, expecting $max_found_aips, got $max_found_aips2_selected" if ($max_found_aips != $max_found_aips2_selected); # select first sample size aips my @sample_set = sort {$a->{uuid} cmp $b->{uuid}} List::Util::sample $sample_size, map {$_->{'_source'} } @{ $aips_response->{hits}->{hits} }; my @result = map { _check_aip($_, $opt) } @sample_set; -- GitLab