package SLUB::LZA::TA::Command::search;
use SLUB::LZA::TA -command;
use v5.36;
use SLUB::LZA::TA::Archivematica::Elasticsearch;
use SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery;
use SLUB::LZA::TA::Output;

use Date::Calc qw(Date_to_Time);
use namespace::autoclean -except => qr{SLUB::LZA::TA::.*};

# VERSION

# ABSTRACT: search IEs module for ta-tool

sub abstract {return "searches Archival Information System (AIS)";}

my $description=<<"DESCR";
Searches an AIS for descriptive or source metadata on behalf of the Technical Analyst.
Returns AIP id. Only the first 10 matches will be presented by default.

Examples:

  * Which AIP has this dc identifier?
    '$0 search -d SLUB:LZA:Kitodo:kitodo:422766'
  * Which AIPs were created in 2021-05-31?
    '$0 search -c 2021-05-31'
  * Which AIPs with mkv-files are in archive?
    '$0 search --pronom-id fmt/569'
  * Which AIP has this dc identifier? Print the AIP id, lzaid of matching AIP
    as Rows of String Values (RSV)
    '$0 search -d SLUB:LZA:Kitodo:kitodo:422766 --with-lzaid --output-as-rsv'
  * Find all AIPs with missing LDP entries
    '$0 search --only-ldp\\
      --with-ldp-collection --with-ldp-funder --with-ldp-lender\\
      --with-ldp-project --output-as-csv --maxrecords=10000\\
      | grep "(^,,)|(,,)|(,\$)"'

DESCR

sub description {
    return "$description"
}
sub opt_spec {
    my @global_opts= SLUB::LZA::TA::common_global_opt_spec();
    my @local_opts = (
        [ 'output-format' => hidden => {
            one_of => [
                [ 'output-as-csv|C' => 'prints output as Comma Separated Values (CSV)' ],
                [ 'output-as-raw|R' => 'print raw hash output of elasticsearch response' ],
                [ 'output-as-rsv|r' => 'prints output as Raw Strings Values (RSV) [default]' ],
            ],
        }
        ],
        [],
        [ 'aip|a=s' => 'search a specific AIP by given AIP id' ],
        [ 'datemode' => hidden => {
            one_of => [
                [ 'creationdate|c=s' => 'search based on creation date in format "YYYY-MM-DD"' ],
                [ 'creationdate-from=s' => 'search based on creation date ranges, beginning date in format "YYYY-MM-DD", implies "--creationdate-to"'],
                #[ 'modificationdate|m=s' => 'search based on modificationdate string' ]
            ]
          }
        ],
        [ 'creationdate-to=s' => 'search based on creation date ranges, beginning date in format "YYYY-MM-DD", implies "--creationdate-from"'],
        [ 'descriptive|d=s', 'search descriptive metadata, using exact match' ],
        [ 'fuzzy|f=s', 'search descriptive metadata, using phrase prefix match' ],
        [ 'lzaid|l=s', 'search a specific AIP by given LZA id' ],
        [ 'maxrecords=i', 'set maxrecords, default is 10', { default => 10 } ],
        [ 'startrecord=i', 'set startrecord, default is 1' ],
        [ 'format' => hidden => { one_of => [
            [ 'pronom-id|p=s' => 'with pronom format id' ],
            #['without-format=s' => 'without pronom format id'],
        ] } ],
        [ 'onlymode' => hidden => {
            one_of => [
                [ 'only-migrated', 'only if migrated from other AIS' ],
                [ 'only-updated', 'only if AIP is an AIP update' ],
                [ 'only-new|only-first-ingest', 'only if AIP is first ingest' ],
        ] } ],
        [ 'workflow|w=s' => 'LZA internal workflow name'],
        [ 'ldpmode' => hidden => {
            one_of => [
                [ 'only-ldp-saxon', 'only if AIP is LDP Saxon funded' ],
                [ 'only-ldp', 'only if AIP is LDP funded' ],
                [ 'only-ldp-without-saxon', 'only if AIP is LDP but not Saxon funded' ],
                [ 'no-ldp', 'only if AIP is not LDP funded' ],
            ] } ],
        [],
        [ 'with-creationdate' => 'also returns AIP creation date' ],
        [ 'with-external-description' => 'also returns AIP external-description'],
        [ 'with-external-identifier' => 'also returns AIP external-identifier'],
        [ 'with-filecount' => 'also returns AIPs file count' ],
        [ 'with-ldp-collection' => 'also returns AIP ldp-collection'],
        [ 'with-ldp-funder' => 'also returns AIP ldp-funder'],
        [ 'with-ldp-lender' => 'also returns AIP ldp-lender'],
        [ 'with-ldp-project' => 'also returns AIP ldp-project'],
        [ 'with-location' => 'also returns AIP location' ],
        [ 'with-path' => 'also returns AIP path' ],
        [ 'with-payload-size' => 'also returns original payload size in Bytes'],
        [ 'with-payload-filecount' => 'also returns original payload file count'],
        [ 'with-result-index' => 'also returns result index' ],
        [ 'with-score' => 'also returns score of Elastic Search match (higher => better)' ],
        [ 'with-size' => 'also returns AIP size in bytes (sum of all file sizes without inodes used for directories)' ],
        [ 'with-slubarchiv-archivalvaluedescription|with-archival-value-description' => 'also returns AIP slubarchiv-archivalvaluedescription'],
        [ 'with-slubarchiv-exporttoarchivedate|with-export2archive-date' => 'also returns AIP slubarchiv-exporttoarchivedate'],
        [ 'with-slubarchiv-externalid' => 'also returns AIP slubarchiv-externalid'],
        [ 'with-slubarchiv-externalisilid' => 'also returns AIP slubarchiv-externalisilid'],
        [ 'with-slubarchiv-externalworkflow' => 'also returns AIP slubarchiv-externalworkflow'],
        [ 'with-slubarchiv-hasconservationreason' => 'also returns AIP slubarchiv-hasconservationreason'],
        [ 'with-slubarchiv-lzaid|with-lzaid' => 'also returns lzaid' ],
        [ 'with-slubarchiv-migrated-aip|with-migrated-aip' => 'also returns AIP slubarchiv-migrated-aip'],
        [ 'with-slubarchiv-origin-ais|with-origin-ais' => 'also returns AIP slubarchiv-origin-ais'],
        [ 'with-slubarchiv-previous-aip|with-previous-aip' => 'also returns AIP slubarchiv-previous-aip'],
        [ 'with-slubarchiv-rightsversion' => 'also returns AIP slubarchiv-rightsversion'],
        [ 'with-slubarchiv-sipversion' => 'also returns AIP slubarchiv-sipversion'],
        [ 'with-status' => 'also returns AIP state in AIS' ],
        [ 'with-title' => 'also returns AIP title' ],
    );
    return (@global_opts, [], @local_opts);
}

sub validate_args($self, $opt, $args) { ## no critic qw(CognitiveComplexity::ProhibitExcessCognitiveComplexity Subroutines::ProhibitExcessComplexity)
    SLUB::LZA::TA::common_global_validate($self, $opt, $args);
    # no args allowed but options!
    $self->usage_error("No args allowed") if @$args;
    my $from_epoch;
    my $to_epoch;
    if (exists $opt->{creationdate}) {
        if ($opt->{creationdate} =~ m/^(\d{4})-(\d{2})-(\d{2})$/ ) {
            $from_epoch = Date_to_Time($1, $2, $3, 0, 0, 0);
            $to_epoch = Date_to_Time($1, $2, $3, 23, 59, 59);
        } else {
            $self->usage_error('--creationdate expects date in format "YYYY-MM-DD"');
        }
    }
    if (exists $opt->{creationdate_from}) {
        $self->usage_error('--creationdate-from implies --creationdate-to"') unless exists $opt->{creationdate_to};
        if ($opt->{creationdate_from} =~ m/^(\d{4})-(\d{2})-(\d{2})$/ ) {
            $from_epoch = Date_to_Time($1, $2, $3, 0, 0, 0);
        } else {
            $self->usage_error('--creationdate-from expects date in format "YYYY-MM-DD", got "'.$opt->{creationdate_from}.'"');
        }
    }
    if (exists $opt->{creationdate_to}) {
        $self->usage_error('--creationdate-to implies --creationdate-from"') unless exists $opt->{creationdate_from};
        if ($opt->{creationdate_to} =~ m/^(\d{4})-(\d{2})-(\d{2})$/ ) {
            $to_epoch = Date_to_Time($1, $2, $3, 23, 59, 59);
        } else {
            $self->usage_error('--creationdate-to expects date in format "YYYY-MM-DD", got "'.$opt->{creationdate_to}.'"');
        }
    }
    if (defined $from_epoch and defined $to_epoch) {
        $opt->{creationdate_epochs}->{from} = $from_epoch;
        $opt->{creationdate_epochs}->{to} = $to_epoch;
        $self->usage_error('--date-to should have a date newer than --date-from') if ($from_epoch > $to_epoch);
    }
    if (exists $opt->{pronom_id}) {
        $self->usage_error("--pronom-id expects string which is conform to PUID structure as described in https://www.nationalarchives.gov.uk/aboutapps/pronom/puid.htm")
        unless ($opt->{pronom_id} =~ m/^(x-)?fmt\/[a-z0-9]+$/ );
    }
    if (exists $opt->{lzaid}) {
        my $rx_up = qr{[A-Za-z0-9_-]+}; # archive name & internal workflow
        my $rx_lw = qr{[a-z0-9_-]+};    # external workflow & external id
        $self->usage_error("--lzaid expects string which is conform to SLUBArchiv internal scheme")
            unless ($opt->{lzaid} =~ m/^$rx_up:$rx_up:$rx_up:$rx_lw:$rx_lw$/);
    }
    if (exists $opt->{'with_payload_size'}) {
        $opt->{'with_payload_oxum'} = 1;
    }
    if (exists $opt->{'with_payload_filecount'}) {
        $opt->{'with_payload_oxum'} = 1;
    }
    return 1;
}


sub find_aips_by_file_pronom_id($self, $opt, $args) {
    # index aipfiles needed
    # find all AIPs where pronom-id like XXX
    my $files_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_files_query($self, $opt, $args);
    my $files_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch(
        $SLUB::LZA::TA::config{elasticsearch_protocol},
        $SLUB::LZA::TA::config{elasticsearch_host},
        $SLUB::LZA::TA::config{elasticsearch_port},
        'aipfiles',   # indexname
        $files_query, # query_hash ref
        {
            debug => $opt->{debug}
        }
    );
    my %aips;
    #p($files_response->{hits}->{hits});
    foreach my $match (@{$files_response->{hits}->{hits}}) {
        my $aip = $match->{_source}->{AIPUUID};
        $aips{$aip} = 1;
    }
    return keys %aips;
}

sub execute($self, $opt, $args) {
    my $aips_query;
    my $aips_response;
    #p($opt);
    if (exists $opt->{format} and $opt->{format} eq 'pronom_id') {
        $aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_aip_query($opt);
        # replace AIP query with all match
        my @aips = find_aips_by_file_pronom_id($self, $opt, $args);
        $aips_query->{query}->{bool}->{must}->{terms}->{uuid} = \@aips;
    } else {
        # only index aips needed
        $aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_aip_query($opt);
    }
    $aips_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch(
        $SLUB::LZA::TA::config{elasticsearch_protocol},
        $SLUB::LZA::TA::config{elasticsearch_host},
        $SLUB::LZA::TA::config{elasticsearch_port},
        'aips', # indexname
        $aips_query, # query_hash ref
        {
            debug => $opt->{debug}
        }
    );
    #p($aips_response);
    SLUB::LZA::TA::Output::print_results($aips_response, $opt);
    return 1;
}

1;
