diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index 28cf20fcd572722c4aa2fb377d58b991e240b99f..c1ef3fbe91d8a70a7d7026cc966ee90a9e60c159 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -15,12 +15,13 @@ # ############################################################################### -use 5.14.0; +use 5.28.0; use strict; use warnings; use Carp; -use File::Basename; +use Path::Tiny; use File::Find; +use File::Sort qw(sort_file); use XML::XPath; use XML::XPath::XMLParser; @@ -148,15 +149,15 @@ sub write_prepare_insert { ############################################################################### sub write_addsql { my $refhash = $_[0]; - my $ieid = basename($refhash->{"filename"},qw/.xml/); + my $ieid = path($refhash->{"filename"})->basename(qw/.xml/); say "BEGIN;"; say "EXECUTE aip_plan ('$ieid');"; # FIXME if multiple locations exists - my $iefile = basename($refhash->{"filename"}); + my $iefile = path($refhash->{"filename"})->basename(); say "EXECUTE ie_plan ('$ieid', '$iefile', '$sourcetype');"; foreach my $location (@{$refhash->{"files"}}) { - my $file = basename($location); # FIXME if multiple locations - my $dir = dirname($location); + + my $file = path($location)->basename(); # FIXME if multiple locations say "EXECUTE file_plan ('$ieid', '$file');"; say "EXECUTE locat_plan ('$ieid', '$file', '$location', '$sourcetype' );"; } @@ -292,49 +293,50 @@ sub parse_iexml { # # this function gets an array reference with all possible files of given regEx # and returns an array reference with reduced files using only highest V-value +# HINT, it only operates on sorted file list with fake versions (with zero-filled prefixes) ################################################################################ -sub find_newest_iefile_version ($) { - my $files = $_[0]; - #say "$files="; - #say Dumper($files); - my %fileshash; - foreach my $file (@{ $files } ) { - $file=~m/^(.+?V)(\d+)(-IE\d+\.xml)$/; +sub find_newest_iefile_version ($$) { + my $files_sorted = shift; + my $files_truncated = shift; + my $fh = $files_sorted->filehandle("<"); + my $last_entry; + while(<$fh>) { + my $entry = $_; + $entry =~ m/^(.+?V)(\d+)(-IE\d+\.xml)$/; + if (!defined $last_entry) { + $last_entry = $entry; + } my ($prefix, $version, $suffix) = ($1, $2, $3); - if (defined $fileshash{$suffix}) { - my ($stored_version, $stored_prefix) = @{ $fileshash{$suffix} }; - if ($version > $stored_version) { - carp "replaced $stored_version with $version of $suffix"; - my @tmp = ($version, $prefix); - $fileshash{$suffix} = \@tmp; - } + $last_entry =~ m/^(.+?V)(\d+)(-IE\d+\.xml)$/; + my ($last_prefix, $last_version, $last_suffix) = ($1, $2, $3); + if (($last_prefix eq $prefix ) && ($last_suffix eq $suffix) && ($last_version < $version)) { } else { - my @tmp = ($version, $prefix); - $fileshash{$suffix} = \@tmp; + $files_truncated->append($last_entry); } + $last_entry = $entry; } - # build new array - my @newfiles = sort { $a eq $b } map { - my $suffix=$_; - my ($version, $prefix) = @{ $fileshash{ $suffix } }; - join ("", $prefix, $version, $suffix); - } (keys %fileshash); - #say "filtered $files="; - #say Dumper(\@newfiles); - return \@newfiles; + $files_truncated->append($last_entry); + return 1; } # begin closure { - my @files; + my $tmp_ies_dir = Path::Tiny->tempdir( TEMPLATE => "exitstrategy_XXXXXXXXXXX", CLEANUP => 1); + my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies"); + my $tmp_ies_sorted_file = $tmp_ies_dir->child("sorted_ies"); + my $tmp_ies_truncated_file = $tmp_ies_dir->child("truncated_ies"); ############################################################################### # call back function to File::Find # ############################################################################### sub process_sip () { my $file=$File::Find::name; - if ($file =~ m/V\d+-IE\d+\.xml$/) { - push @files, $file; + if ($file =~ m/V(\d+)-IE\d+\.xml$/) { + # fake name to ue alphabetical sort + my $version = $1; + my $fakeversion = sprintf("%05i",$version); + $file =~s/V(\d+)-IE/V$fakeversion-IE/; + $tmp_ies_unsorted_file -> append( $file."\n"); } return; } @@ -348,15 +350,27 @@ sub find_newest_iefile_version ($) { write_database_creation(); write_tables_creation(); write_prepare_insert(); + $tmp_ies_unsorted_file->touch(); find(\&process_sip, $dir); - # find newest version of files - my @sorted_files = sort {$a eq $b} @files; - my $files = find_newest_iefile_version ( \@sorted_files ); - foreach my $file (@{ $files }) { - my $ret = parse_iexml($file); + # /permanent_storage/2020/04/02/IE201080/V1-FL201091.xml + # /permanent_storage/2020/04/02/IE201080/V2-FL201091.xml + sort_file({ + I => $tmp_ies_unsorted_file->absolute()->stringify, + o => $tmp_ies_sorted_file->absolute()->stringify, + }); + find_newest_iefile_version ($tmp_ies_sorted_file, $tmp_ies_truncated_file ); + # now operate on truncated + my $fh = $tmp_ies_truncated_file->openr(); + my $count=0; + while( <$fh> ) { + chomp; + $count++; + s/V(0*)(\d+-IE)/V$2/; # revert fake version + my $ret = parse_iexml($_); write_addsql($ret); } write_index_creation(); + warn "processed $count uniq IEs\n;" } else { die "no directory given on commandline" }