package SLUB::LZA::TA::Command::sample_testing;
use SLUB::LZA::TA -command;
use v5.36;
use utf8;
use SLUB::LZA::TA::Archivematica::Elasticsearch;
use SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery;
use SLUB::LZA::TA::Output;

use Archive::BagIt::Fast;
use Date::Calc qw(Date_to_Time Today Add_Delta_YM Add_Delta_YMD Day_of_Week);
use List::Util qw(sample);
use namespace::autoclean -except => qr{SLUB::LZA::TA::.*};

# VERSION

# ABSTRACT: sample testing module for ta-tool

sub abstract { return "tests samples of AIPs in current Archival Information System (AIS)";}
my $base_cmd = "$0 sample-test";
my $dummycmd = " "x length($base_cmd);
my $description=<<"DESCR";
Ask an AIS for a sample of AIPs and tests them.

Overview:

$base_cmd [--daily| --weekly | --monthly | --yearly] [--workflow WORKFLOW] [--factor FACTOR]

$dummycmd --version
$dummycmd --help

Examples:

  * Report sample testing statistics of AIPs last month for workflow Kitodo
    '$base_cmd --monthly --workflow Kitodo'
  * Report 1‰ sample statistics of AIPs last year
    '$base_cmd --yearly --factor 0.001'

A printable PDF version could be generated using ff. commands:
    '$base_cmd | asciidoctor-pdf - > report.pdf'

HINT: If you want lists, use the 'search' command instead!
HINT: ensure the ElasticSearch server allows to return >10.000 results if your archive is large

DESCR

sub description {
    return "$description"
}
sub opt_spec {
    my @global_opts= SLUB::LZA::TA::common_global_opt_spec();
    my @local_opts = (
        [ 'datemode' => hidden => {
            one_of => [
                [ 'daily|d'   => 'sample  based on last day'],
                [ 'weekly|W'  => 'sample  based on last week'],
                [ 'monthly|m' => 'sample  based on last month' ],
                [ 'yearly|y'  => 'sample  based on last year' ],
                [ 'ldpyearly' => 'sample  based on last LDP year 01.11. - 31.10.'],
                [ 'complete|c'=> 'sample  based on all AIPs (default)'],
                [ 'date-from=s' => 'sample  based on date range, beginning date in format "YYYY-MM-DD", implies "--date-to"'],
            ],
          },
        ],
        [ 'date-to=s' => 'report based on date range, end date in format "YYYY-MM-DD", implies "--date-from"'],
        [],
        [ 'output-format' => hidden => {
            one_of => [
                [ 'output-as-csv|C' => 'prints output as Comma Separated Values (CSV)' ],
                [ 'output-as-raw|R' => 'print raw hash output' ],
                [ 'output-as-rsv|r' => 'prints output as Raw Strings Values (RSV)' ],
                [ 'output-as-asciidoc|a' => 'prints output as AsciiDoc [default]' ],
            ],
        }
        ],
        [],
        [ 'factor|f=f' => 'use the given sampling factor (based on AIP count, default: 0.001)', {default => 0.001}],
        [],
    );
    return (@global_opts, [], @local_opts);
}
sub validate_args { ## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
    my ($self, $opt, $args) = @_;
    SLUB::LZA::TA::common_global_validate($self, $opt, $args);
    # no args allowed but options!
    $self->usage_error("No args allowed") if @$args;
    my ($cyear, $cmonth, $cday) = Today();
    my ($from_year, $from_month, $from_day);
    my ($to_year, $to_month, $to_day);
    unless (exists $opt->{datemode}) {
        $opt->{datemode} = "complete";
        $opt->{complete} = 1;
    }
    my %date_recipe;
    $date_recipe{daily} = sub {
        ($from_year, $from_month, $from_day) = Add_Delta_YMD($cyear, $cmonth, $cday, 0, 0, -1);
        ($to_year, $to_month, $to_day) = ($from_year, $from_month, $from_day);
    };
    $date_recipe{weekly} = sub {
        ($from_year, $from_month, $from_day) = Add_Delta_YMD($cyear, $cmonth, $cday, 0, 0, -Day_of_Week($cyear, $cmonth, $cday) - 6);
        ($to_year, $to_month, $to_day) = Add_Delta_YMD($from_year, $from_month, $from_day, 0, 0, 6);
    };
    $date_recipe{monthly} = sub {
        ($from_year, $from_month, $from_day) = Add_Delta_YM($cyear, $cmonth, 1, 0, -1);
        ($to_year, $to_month, $to_day) = Add_Delta_YMD($from_year, $from_month, $from_day, 0, 1, -1);
    };
    $date_recipe{yearly} = sub {
        ($from_year, $from_month, $from_day) = Add_Delta_YM($cyear, 1, 1, -1, 0);
        ($to_year, $to_month, $to_day) = Add_Delta_YMD($from_year, $from_month, $from_day, 1, 0, -1);
    };
    $date_recipe{complete} = sub {
        $from_year = 2015;
        $from_month = 1;
        $from_day = 1;
        $to_year = $cyear;
        $to_month = $cmonth;
        $to_day = $cday;
    };
    $date_recipe{ldpyearly} = sub {
        ($from_year, $from_month, $from_day) = Add_Delta_YM($cyear, 1, 1, -1, -2);
        ($to_year, $to_month, $to_day) = Add_Delta_YMD($from_year, $from_month, $from_day, 1, 0, -1);
    };
    $date_recipe{date_from} = sub {
        $self->usage_error('--date-from implies --date-to"') unless exists $opt->{date_to};
        if ($opt->{date_from} =~ m/^(\d{4})-(\d{2})-(\d{2})$/) {
            ($from_year, $from_month, $from_day) = ($1, $2, $3);
        } else {
            $self->usage_error('--date-from expects date in format "YYYY-MM-DD", got "' . $opt->{date_from} . '"');
        }
    };
    $date_recipe{date_to} = sub {
        $self->usage_error('--date-to implies --date-from"') unless exists $opt->{date_from};
        if ($opt->{date_to} =~ m/^(\d{4})-(\d{2})-(\d{2})$/) {
            ($to_year, $to_month, $to_day) = ($1, $2, $3);
        } else {
            $self->usage_error('--date-to expects date in format "YYYY-MM-DD", got "', $opt->{date_to} . '"');
        }
    };
    foreach my $key (keys %{ $opt } ) {
        $date_recipe{$key}->() if (defined $date_recipe{$key} and ref $date_recipe{$key} eq 'CODE');
    }
    $opt->{output_format} = 'output_as_asciidoc' unless (exists $opt->{output_format});
    my $from_epoch = Date_to_Time($from_year, $from_month, $from_day, 0, 0, 0);
    my $to_epoch = Date_to_Time($to_year, $to_month, $to_day, 0, 0, 0);
    $self->usage_error('--date-to should have a date newer than --date-from') if ($from_epoch > $to_epoch);
    printf STDERR "reporting for period %04u-%02u-%02u … %04u-%02u-%02u\n", $from_year, $from_month, $from_day, $to_year, $to_month, $to_day;
    $opt->{creationdate_epochs}->{from} = $from_epoch;
    $opt->{creationdate_epochs}->{to} = $to_epoch;
    $opt->{creationdate_epochs}->{from_string} = sprintf("%04u-%02u-%02u", $from_year, $from_month, $from_day);
    $opt->{creationdate_epochs}->{to_string} = sprintf("%04u-%02u-%02u", $to_year, $to_month, $to_day);

    $self->usage_error('--factor FACTOR expects a FACTOR between 0 and 1, got ', $opt->{factor}) if ( $opt->{factor} > 1.0 or $opt->{factor} < 0.0);
    return 1;
}

sub _check_aip ($sample, $opt) {
    my $path = $sample->{filePath};
    my $is_valid=0;
    my $prefix_permanent = '/var/archivematica/sharedDirectory/www/AIPsStore';
    my $line;
    my $tries = 0;
    foreach my $current_permanent (@{$SLUB::LZA::TA::config{permanent_volume}}) {
        (my $remapped = $path) =~ s#^$prefix_permanent#$current_permanent#;
        if ($opt->{debug}) {
            say STDERR "try remap $path to $remapped";
        }
        if ($tries == 0) {
            $line->{localPath} = $remapped;
        } else {
            $line->{"localPath$tries"} = $remapped;
        }
        if (-d $remapped) {
            my $bag = Archive::BagIt::Fast->new($remapped);
            $bag->use_plugins('Archive::BagIt::Plugin::Algorithm::SHA512');
            $is_valid = $bag->verify_bag({ report_all_errors => 1 });
            if (!$is_valid) {$is_valid = 0;}
            last;
        } else {

            $is_valid = -1;
        }
        $tries++;
    }

    $line->{aipid}=$sample->{uuid};
    $line->{filePath}=$sample->{filePath};
    $line->{lzaid}=$sample->{transferMetadata}->[0]->{'bim:bag-info_dict'}->{'bim:SLUBArchiv-lzaId'};
    $line->{isvalid}=$is_valid ;
    $line->{checkdate}=localtime;
    $line->{creationdate}=localtime $sample->{created};
    return $line;
}

sub _execute {
    my ($self, $opt, $args) = @_;
    my $aips_query;
    my $aips_response;
    use Data::Printer;
    # only index aips needed
    $aips_query = SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::prepare_aip_query($opt);
    # next lines extend query with reporting
    $aips_query->{'_source'} = {'includes' =>
        ['uuid', 'filePath', 'transferMetadata.bim:bag-info_dict.bim:SLUBArchiv-lzaId', 'created']};
    #p($aips_query);
    $aips_response = SLUB::LZA::TA::Archivematica::Elasticsearch::query_elasticsearch(
        $SLUB::LZA::TA::config{elasticsearch_protocol},
        $SLUB::LZA::TA::config{elasticsearch_host},
        $SLUB::LZA::TA::config{elasticsearch_port},
        'aips',      # indexname
        $aips_query, # query_hash ref
        {
            debug => $opt->{debug},
        }
    );

    my $max_found_aips = $aips_response->{hits}->{total};
    my $sample_size = int($max_found_aips * $opt->{factor} + 0.5);
    if ($opt->{debug}) {
        say STDERR "found $max_found_aips AIPs, use only a $opt->{factor} subsample of $sample_size AIPs";
    }
    # select first sample size aips
    my @sample_set = sort {$a->{uuid} cmp $b->{uuid}} List::Util::sample $sample_size, map {$_->{'_source'} } @{ $aips_response->{hits}->{hits} };
    my @result = map { _check_aip($_, $opt) } @sample_set;
    return \@result;
}



sub execute($self, $opt, $args) {
    my %results;
    $results{date} = sprintf("%04u-%02u-%02u", Today());
    $results{package} = __PACKAGE__;
    $results{from} = $opt->{creationdate_epochs}->{from_string};
    $results{to} = $opt->{creationdate_epochs}->{to_string};

    say STDERR "scanning volumes: ".join(" ", @{ $SLUB::LZA::TA::config{permanent_volume} });
    # we need only _source->filePath and AIPID (and LZA-ID?)
    my $res = _execute($self, $opt, $args);
    $results{lines} = $res;
    my ($headers, $table) = prepare_for_table($res);
                   print_humanreadable_report(\%results) if ($opt->{output_format} eq 'output_as_asciidoc');
    SLUB::LZA::TA::Output::RSV::print_results($table)    if ($opt->{output_format} eq 'output_as_rsv');
    SLUB::LZA::TA::Output::CSV::print_results($table)    if ($opt->{output_format} eq 'output_as_csv');
    SLUB::LZA::TA::Output::Raw::print_results(\%results) if ($opt->{output_format} eq 'output_as_raw');
    say STDERR "report is already sent to STDOUT.";
    return 1;
}

sub prepare_for_table($results) { ## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
    my @table = @{ $results };
    my @headers = sort keys %{ $table[0] };
    return \@headers, \@table;
}

sub _print_aip($line) {
    my $aipid=$line->{aipid};
    #my $isvalid=($line->{isvalid}?"valid (✅)":"*invalid* (❎)");
    my $isvalid;
    if ($line->{isvalid} == 1) {
        $isvalid="valid";
    } elsif ($line->{isvalid} == 0) {
        $isvalid="invalid";
    } elsif ($line->{isvalid} == -1) {
        $isvalid="not accessible or not a dir";
    } else {
        $isvalid="unknown";
    }
    my $lzaid=$line->{lzaid}?"With lzaid '$line->{lzaid}'":"No lzaid defined";
    my $path=$line->{filePath};
    my $mapped = $line->{localPath} // "";
    foreach my $additional_key (grep {/^localPath\d/} keys %{ $line }) {
        my $additional_path = $line->{$additional_key};
        $mapped.= " or $additional_path";
    }
    say " * AIP `$aipid`";
    say " ** Is $isvalid";
    say " ** $lzaid";
    say " ** Path is `$path`";
    say " ** Mapped Path is $mapped";
    say " ** processed at $line->{checkdate}, created at $line->{creationdate}";
    return 1;
}

sub print_humanreadable_report ($results) { ## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
    say <<"RPTHEADER";
:lang: en
:doctype: article
:date: $results->{date}
:icons:
:text-align: left
:generator: $0 ($results->{package})

= Report from $results->{from} to $results->{to}
RPTHEADER
    say "== Complete archive\n";

    say "=== Invalid AIPs";

    foreach my $line (grep {!($_->{is_valid})} @{$results->{lines} }) {
        _print_aip($line)
    }

    say "=== Valid AIPs";

    foreach my $line (grep {!($_->{is_valid})} @{$results->{lines} }) {
        _print_aip($line)
    }

    return 1;
}

1;
