package SLUB::LZA::SIPBuilderBagIt;
use strict;
use warnings;
use utf8;
use feature 'say';
no warnings "experimental::signatures";
use feature 'signatures';
use Archive::BagIt;
use Carp;
use Encode;
use File::Copy;
use File::Copy::Recursive qw(dircopy dirmove);
use File::Basename;
use File::Find;
use List::Util qw(none);
use LWP::UserAgent;          # to get MARC data
use MARC::Record;
use Path::Tiny;
use Term::ANSIColor;         # colored print
use XML::LibXML;
use XML::XPath;

our $VERBOSE = 0;            # print output switch 0 => on, 1 and larger = off
our $with_debug = 0;         # output debug infos and files

# catalogue infos
my $swb_url        = 'https://sru.bsz-bw.de/swb';
my $searchkey_swb  = 'pica.swn';
my $searchkey_k10p = 'pica.ppn';
my $recordschema_swb  = 'marcxmlvbos';
my $recordschema_k10p = 'marcxmlk10os';

# MARC21 -> bag-info.txt mapping
# reference: https://www.loc.gov/marc/bibliographic/bd20x24x.html
my $marc21_to_baginfo_mappings = [
    { tag => "245", code => "a", key => "Title" },
    { tag => "245", code => "b", key => "Title" },
    { tag => "245", code => "c", key => "Title" },
    { tag => "246", code => "a", key => "Title" },
    { tag => "246", code => "b", key => "Title" },
];

sub add_metadata($refAddBagInfo, $ppn, $noppn, $marc21) {
    # adding entries to bag-info key list
    push @{$refAddBagInfo}, {'External-Identifier' => "PPN:$ppn"} unless (! $ppn);
    push @{$refAddBagInfo}, {'External-Identifier' => "$noppn"} unless (! $noppn);
    if ($marc21) {
        foreach my $mapping (@{$marc21_to_baginfo_mappings}) {
            my $text = get_marc21_text_node($marc21, $mapping);
            push @{$refAddBagInfo}, {$mapping->{key} => $text} unless (! $text);
        }
    }
    return 1;
}

sub get_marc21_text_node($marc21, $mapping) {
    if(ref($mapping) ne 'HASH') { croak "not a mapping hash!"; }
    # retrieve value based on mapping using a xpath
    my $xpath = "//*[\@tag='" . $mapping->{tag} . "']/";
    $xpath .= "*[local-name()='subfield' and \@code='" . $mapping->{code} . "']/" unless (! exists $mapping->{code});
    $xpath .= "text()";
    # HINT: xpath example //*[@tag='245']/*[local-name()='subfield' and @code='a']/text()
    my $text = XML::XPath->new($marc21)->findnodes_as_string($xpath);

    return $text;
}

sub get_marc21_from_catalogue($ppn){
    if(! $_[0]){ croak "ppn not defined!"; }
    my $marc21;
    my $use_fallback;

    eval { # try SWB catalogue
        $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from($swb_url, $ppn, $searchkey_swb, $recordschema_swb);
    };
    if ($@) { # error
        say "$@" unless (! $with_debug);
        if ($@ =~ m#ERROR: Did not get any <recordData/> for PPN#) {
            print_scalar_data("", "WARNING: '$ppn' not a SWB PPN, fallback to K10plus", "", "yellow");
            $use_fallback = 1;
        }
        else {
            die "$@"; # throw exception again
        }
    }

    if ($use_fallback) {
        eval { # try K10plus catalogue
            $marc21 = SLUB::LZA::SIPBuilderBagIt::get_marc21_from($swb_url, $ppn, $searchkey_k10p, $recordschema_k10p);
        };
        if ($@) { # error
            say "$@" unless (!$with_debug);
            if ($@ =~ m#ERROR: Did not get any <recordData/> for PPN#) {
                print_scalar_data("", "ERROR: '$ppn' also not a K10plus PPN", "", "red");
                exit 1;
            }
            else {
                die "$@"; # throw exception again
            }
        }
    }

    print_scalar_data("", "INFO: Found metadata for PPN '$ppn'", "", "white");
    return $marc21;
}

sub get_marc21_from($url, $ppn,$key,$schema){
    # example: ppn="457035137"  for "Der Fichtelberg"
    # specification SRU/SRW BSZ: https://wiki.k10plus.de/pages/viewpage.action?pageId=132874251
    my $ua = LWP::UserAgent->new;
    $ua->agent("MyApp/0.1 ");
    $ua->timeout(3600); #1h

    my $srubase        = $url; # host
    my $srusearchkey   = $key; # SRU search key
    my $sruvalue       = $ppn;
    my $srumaxrecords  = 1;
    my $srustartrecord = 1;
    my $sruschema      = $schema;
    my $sru            = "${srubase}?version=1.1&query=${srusearchkey}%3D${sruvalue}&operation=searchRetrieve&maximumRecords=${srumaxrecords}&startRecord=${srustartrecord}&recordSchema=${sruschema}";

    if ($with_debug) {say "catalog-URL='$sru'";}
    my $response = $ua->get($sru); # ask SWB for given PPN
    if ($response->is_success) {
        # parse ZiNG response, extract MARC-data
        my $xp = XML::XPath->new($response->decoded_content);
        my $parser = XML::LibXML->new();
        if ($with_debug) {
            say "write DEBUG_${ppn}_catalog_response.xml";
            SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_${ppn}_catalog_response.xml", $response->decoded_content);
        }
        my $recordData = $xp->findnodes_as_string('/*[local-name()="searchRetrieveResponse"]/*[local-name()="records"]/*[local-name()="record"]/*[local-name()="recordData"]/*');
        if (!$recordData) { croak("ERROR: Did not get any <recordData/> for PPN '$ppn' using '$sru'");}
        my $marc_doc = $parser->parse_string($recordData);
        add_marc21_schema_location($marc_doc);
        if ($with_debug) {
            say "write DEBUG_${ppn}_extracted_marc_record.xml";
            SLUB::LZA::SIPBuilderBagIt::write_file("DEBUG_${ppn}_extracted_marc_record.xml", $marc_doc);
        }
        return $marc_doc->serialize();
    }
    else {
        carp("Problem asking catalogue at $url using $ppn");
    }

    return;
}

sub add_marc21_schema_location($marc_doc) {
    if(ref($marc_doc) ne 'XML::LibXML::Document') { croak "marc xml document required!"; }
    my $root = $marc_doc->findnodes('/*[local-name()="record"]')->[0]; # get root element (XML::LibXML::Element)
    $root->setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance'); # add xsi namespace (simple)
    $root->setAttribute('xsi:schemaLocation', 'http://www.loc.gov/MARC21/slim https://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'); # add MARCXML schema location
    # HINT: method sets or replaces the node's attribute
    return 1;
}

sub write_file($filename, $value){
    # write data to file (UTF-8)
    open(my $fh, '>:encoding(UTF-8)', $filename) || (croak "Can't open '$filename', $!");
    print $fh $value;
    close($fh) || (croak "could not close file '$filename', $!");
    return 1;
}

sub generateBagName($file_date, $ppn, $noppn){
    $file_date =~ s/T/_/g; # replace 'T' with '_'
    $file_date =~ s/:/-/g; # replace ':' with '-'
    return (defined $ppn)? "PPN-${ppn}_${file_date}" : "ID-${noppn}_${file_date}";
}

sub createDir($path){
    if(! -d $path && ! -f $path){
        eval { path($path)->mkpath; };
        if ($@) { # error
            print_scalar_data("", "ERROR: Failed to create directory '$path'.", "", "red");
            die $@;
        }
    }
    return 1;
}

sub copyFilesToMeta($ppn, $metaPath,$rightsFilePath,$aRefAddMetaFile,$marc21){

    my @addMetaFile = @{ $aRefAddMetaFile };

    if(@addMetaFile){
        my $i = 1;
        foreach my $file(@addMetaFile){
            # check reserved file names
            my $meta_file_name = basename($file);
            if($meta_file_name eq "rights.xml" || $meta_file_name eq "marc21.xml"){
                foreach my $f(@addMetaFile){
                    # check if numbered xml file exists
                    my $f_name = basename($f);
                    while($f_name eq "$i.xml"){
                        $i++;
                    }
                }
                print_scalar_data("", "WARNING: Renaming " . $meta_file_name . " to $i.xml, because meta filename <rights.xml> or <marc21.xml> is reserved.", "", "yellow");
                print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
                copy($file, $metaPath) or die "Copy failed: $!";
                rename("$metaPath/$meta_file_name", "$metaPath/$i.xml");
                $i++;
            }else{
                copy($file, $metaPath) or die "Copy failed: $!";
            }
        }
    }

    # marc21.xml
    if ($ppn && $marc21) {
        write_file("$metaPath/marc21.xml", $marc21);
    }

    # rights.xml
    if(basename($rightsFilePath) ne "rights.xml"){
        my $rights_name = basename($rightsFilePath);
        print_scalar_data("", "WARNING: Renaming " . $rights_name . " to rights.xml, because meta filename <rights.xml> is required.", "", "yellow");
        print_scalar_data("", "INFO: Read Docu for more information at https://slubarchiv.slub-dresden.de/technische-standards-fuer-die-ablieferung-von-digitalen-dokumenten/", "", "white");
        copy($rightsFilePath, $metaPath) or die "Copy failed: $!";
        rename("$metaPath/$rights_name", "$metaPath/rights.xml");
    }else{
        copy($rightsFilePath, $metaPath) or die "Copy failed: $!";
    }

    return 1;
}

sub validateRightsXML($rightsFilePathString,$xsdName){
    my $rightsFilePath = path($rightsFilePathString);
    if (!$rightsFilePath->is_file) {
        die "rightsFilePath '$rightsFilePath' is not readable/does not exist";
    }
    my $rightsName = $rightsFilePath->basename();
    my $xsd_file = path(__FILE__)->parent->realpath->parent->child("xsd")->child($xsdName);    # absolute path
    if (! $xsd_file->is_file) {
        # retry, because assembled path for lib/
        $xsd_file = path(__FILE__)->parent->parent->parent->realpath->parent->child("xsd")->child($xsdName);
    }

    my $schema = XML::LibXML::Schema->new(location => $xsd_file);
    my $parser = XML::LibXML->new;
    my $doc    = $parser->parse_file($rightsFilePath);

    eval {
        $schema->validate($doc);
    };
    if ($@) { # error
        die "File $rightsName failed validation: $@";
    };
    return 1;
}

sub print_scalar_data($header,$p,$arrRef,$pColor){
    my $priority = 0;
    # header
    if($header ne ""){
        print "----------------------------------------------------------------------------------\n" if $priority >= $VERBOSE;
        printf "%" . length($header) . "s\n", colored($header, 'bold green') if $priority >= $VERBOSE;
        print "----------------------------------------------------------------------------------\n" if $priority >= $VERBOSE;
    }
    # paragraph
    if(($p ne "") && ($arrRef eq "")){
        printf "%" . length($p) . "s\n", colored($p, "bold $pColor") if $priority >= $VERBOSE;
    }

    # paragraph with converted array
    if(($p ne "") && ($arrRef ne "")){
        my $string = "";
        for my $i (0 .. $#$arrRef){
            if($i eq $#$arrRef){
                $string .= " $$arrRef[$i]";
            } else {
                $string .= " $$arrRef[$i],";
            }
        }
        my $body = $p . $string;
        printf "%" . length($body) . "s\n", colored($body, "bold $pColor") if $priority >= $VERBOSE;
    }

    return 1;
}

sub create_slub_bagit($bagPath,$refAddBagInfo){
    # construct bag
    if(! defined $bagPath) { croak "bagPath is not defined!"; }
    if(! -d $bagPath) { croak "path '$bagPath' is not directory!" }
    if(! defined $refAddBagInfo ) { croak "array of hashes not defined for bag-info.txt!"; }
    my $oArchiveBagIt = Archive::BagIt->make_bag($bagPath);
    $oArchiveBagIt->bag_info($refAddBagInfo);
    $oArchiveBagIt->store();
    print_scalar_data("", "SUCCESS: SLUB SIP at $bagPath build successfully!", "", "green");
    return 1;
}

sub buildBagWithCopyOption($ppn,$noppn,$ieDirectory,$rightsFilePath,$aRefAddMetaFile,$refAddBagInfo,$bagPath,$dataPath,$metaPath){
    my @addMetaFile = @{ $aRefAddMetaFile };
    my @addBagInfo = @{ $refAddBagInfo };
    my $marc21;

    # get descriptive metadata from catalog
    if ($ppn) {
        $marc21 = get_marc21_from_catalogue($ppn);
    }
    # create bag dir
    SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
    # create data dir
    SLUB::LZA::SIPBuilderBagIt::createDir($dataPath);
    # copy payload files to data
    print_scalar_data("", "INFO: copying original data...", "", "white");
    dircopy($ieDirectory, $dataPath) || die ("Error coping $ieDirectory to $dataPath");
    # create meta dir
    SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
    # copy not payload files to meta
    SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
    # add metadata for bag-info.txt
    add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
    # metadata warnings
    SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
    # create slub bagit
    print_scalar_data("", "INFO: building SIP...", "", "white");
    SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);

    return 1;
}

sub buildBagWithMoveOption($ppn,$noppn,$ieDirectory,$rightsFilePath,$aRefAddMetaFile,$refAddBagInfo,$bagPath,$dataPath,$metaPath){
    my @addMetaFile = @{ $aRefAddMetaFile };
    my @addBagInfo = @{ $refAddBagInfo };
    my $marc21;

    # get descriptive metadata from catalog
    if ($ppn) {
        $marc21 = get_marc21_from_catalogue($ppn);
    }
    # create bag dir
    SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
    # move payloads to data dir (which is automatically renamed from IE Directory)
    print_scalar_data("", "INFO: moving original data...", "", "white");
    dirmove($ieDirectory, $dataPath) || die ("Error moving $ieDirectory to $dataPath");
    # create meta dir
    SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
    # copy not payload files to meta
    SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
    # add metadata for bag-info.txt
    add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
    # metadata warnings
    SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
    # create slub bagit
    print_scalar_data("", "INFO: building SIP...", "", "white");
    SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);

    return 1;
}

sub buildBagWithReplaceOption($ppn,$noppn,$ieDirectory,$rightsFilePath,$aRefAddMetaFile,$refAddBagInfo,$bagPath,$dataPath,$metaPath){
    my @addMetaFile = @{ $aRefAddMetaFile };
    my @addBagInfo = @{ $refAddBagInfo };
    my $marc21;

    # get descriptive metadata from catalog
    if ($ppn) {
        $marc21 = get_marc21_from_catalogue($ppn);
    }
    # create bag dir
    SLUB::LZA::SIPBuilderBagIt::createDir($bagPath);
    # move payloads to data dir (which is automatically renamed from IE Directory)
    print_scalar_data("", "INFO: moving original data...", "", "white");
    dirmove($ieDirectory, $dataPath) || die ("Error moving $ieDirectory to $dataPath");
    # create meta dir
    SLUB::LZA::SIPBuilderBagIt::createDir($metaPath);
    # copy not payload files to meta
    SLUB::LZA::SIPBuilderBagIt::copyFilesToMeta($ppn, $metaPath, $rightsFilePath, \@addMetaFile, $marc21);
    # add metadata for bag-info.txt
    add_metadata(\@addBagInfo, $ppn, $noppn, $marc21);
    # metadata warnings
    SLUB::LZA::SIPBuilderBagIt::checkForTitle(\@addBagInfo);
    # create slub bagit
    print_scalar_data("", "INFO: building SIP...", "", "white");
    SLUB::LZA::SIPBuilderBagIt::create_slub_bagit($bagPath, \@addBagInfo);

    return 1;
}

sub checkForTitle($key_hash_list_aref){
    # collect all bag-info keys for easy check
    my @keys;
    for my $hash (@{$key_hash_list_aref}) {
        push @keys, keys %{$hash};
    }
    # check for title
    if (none { $_ =~ m/^[Tt][Ii][Tt][Ll][Ee]/ } @keys) {
        print_scalar_data("", "WARNING: No Title given. Please consider adding a human-readable title with '--add_key_value=Title:YOUR-TITLE'", "", "yellow");
        return 0;
    }
    return 1;
}
1;