Skip to content
Snippets Groups Projects
Commit 9fb93a76 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- refactored to use files to hold IEs for processing, hopefully it reduced the...

- refactored to use files to hold IEs for processing, hopefully it reduced the problem of big memory footprint
parent 9d47d7d6
No related branches found
No related tags found
No related merge requests found
...@@ -15,12 +15,13 @@ ...@@ -15,12 +15,13 @@
# #
############################################################################### ###############################################################################
use 5.14.0; use 5.28.0;
use strict; use strict;
use warnings; use warnings;
use Carp; use Carp;
use File::Basename; use Path::Tiny;
use File::Find; use File::Find;
use File::Sort qw(sort_file);
use XML::XPath; use XML::XPath;
use XML::XPath::XMLParser; use XML::XPath::XMLParser;
...@@ -148,15 +149,15 @@ sub write_prepare_insert { ...@@ -148,15 +149,15 @@ sub write_prepare_insert {
############################################################################### ###############################################################################
sub write_addsql { sub write_addsql {
my $refhash = $_[0]; my $refhash = $_[0];
my $ieid = basename($refhash->{"filename"},qw/.xml/); my $ieid = path($refhash->{"filename"})->basename(qw/.xml/);
say "BEGIN;"; say "BEGIN;";
say "EXECUTE aip_plan ('$ieid');"; say "EXECUTE aip_plan ('$ieid');";
# FIXME if multiple locations exists # FIXME if multiple locations exists
my $iefile = basename($refhash->{"filename"}); my $iefile = path($refhash->{"filename"})->basename();
say "EXECUTE ie_plan ('$ieid', '$iefile', '$sourcetype');"; say "EXECUTE ie_plan ('$ieid', '$iefile', '$sourcetype');";
foreach my $location (@{$refhash->{"files"}}) { foreach my $location (@{$refhash->{"files"}}) {
my $file = basename($location); # FIXME if multiple locations
my $dir = dirname($location); my $file = path($location)->basename(); # FIXME if multiple locations
say "EXECUTE file_plan ('$ieid', '$file');"; say "EXECUTE file_plan ('$ieid', '$file');";
say "EXECUTE locat_plan ('$ieid', '$file', '$location', '$sourcetype' );"; say "EXECUTE locat_plan ('$ieid', '$file', '$location', '$sourcetype' );";
} }
...@@ -292,49 +293,50 @@ sub parse_iexml { ...@@ -292,49 +293,50 @@ sub parse_iexml {
# #
# this function gets an array reference with all possible files of given regEx # this function gets an array reference with all possible files of given regEx
# and returns an array reference with reduced files using only highest V-value # and returns an array reference with reduced files using only highest V-value
# HINT, it only operates on sorted file list with fake versions (with zero-filled prefixes)
################################################################################ ################################################################################
sub find_newest_iefile_version ($) { sub find_newest_iefile_version ($$) {
my $files = $_[0]; my $files_sorted = shift;
#say "$files="; my $files_truncated = shift;
#say Dumper($files); my $fh = $files_sorted->filehandle("<");
my %fileshash; my $last_entry;
foreach my $file (@{ $files } ) { while(<$fh>) {
$file=~m/^(.+?V)(\d+)(-IE\d+\.xml)$/; my $entry = $_;
my ($prefix, $version, $suffix) = ($1, $2, $3); $entry =~ m/^(.+?V)(\d+)(-IE\d+\.xml)$/;
if (defined $fileshash{$suffix}) { if (!defined $last_entry) {
my ($stored_version, $stored_prefix) = @{ $fileshash{$suffix} }; $last_entry = $entry;
if ($version > $stored_version) {
carp "replaced $stored_version with $version of $suffix";
my @tmp = ($version, $prefix);
$fileshash{$suffix} = \@tmp;
} }
my ($prefix, $version, $suffix) = ($1, $2, $3);
$last_entry =~ m/^(.+?V)(\d+)(-IE\d+\.xml)$/;
my ($last_prefix, $last_version, $last_suffix) = ($1, $2, $3);
if (($last_prefix eq $prefix ) && ($last_suffix eq $suffix) && ($last_version < $version)) {
} else { } else {
my @tmp = ($version, $prefix); $files_truncated->append($last_entry);
$fileshash{$suffix} = \@tmp;
} }
$last_entry = $entry;
} }
# build new array $files_truncated->append($last_entry);
my @newfiles = sort { $a eq $b } map { return 1;
my $suffix=$_;
my ($version, $prefix) = @{ $fileshash{ $suffix } };
join ("", $prefix, $version, $suffix);
} (keys %fileshash);
#say "filtered $files=";
#say Dumper(\@newfiles);
return \@newfiles;
} }
# begin closure # begin closure
{ {
my @files; my $tmp_ies_dir = Path::Tiny->tempdir( TEMPLATE => "exitstrategy_XXXXXXXXXXX", CLEANUP => 1);
my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies");
my $tmp_ies_sorted_file = $tmp_ies_dir->child("sorted_ies");
my $tmp_ies_truncated_file = $tmp_ies_dir->child("truncated_ies");
############################################################################### ###############################################################################
# call back function to File::Find # call back function to File::Find
# #
############################################################################### ###############################################################################
sub process_sip () { sub process_sip () {
my $file=$File::Find::name; my $file=$File::Find::name;
if ($file =~ m/V\d+-IE\d+\.xml$/) { if ($file =~ m/V(\d+)-IE\d+\.xml$/) {
push @files, $file; # fake name to ue alphabetical sort
my $version = $1;
my $fakeversion = sprintf("%05i",$version);
$file =~s/V(\d+)-IE/V$fakeversion-IE/;
$tmp_ies_unsorted_file -> append( $file."\n");
} }
return; return;
} }
...@@ -348,15 +350,27 @@ sub find_newest_iefile_version ($) { ...@@ -348,15 +350,27 @@ sub find_newest_iefile_version ($) {
write_database_creation(); write_database_creation();
write_tables_creation(); write_tables_creation();
write_prepare_insert(); write_prepare_insert();
$tmp_ies_unsorted_file->touch();
find(\&process_sip, $dir); find(\&process_sip, $dir);
# find newest version of files # /permanent_storage/2020/04/02/IE201080/V1-FL201091.xml
my @sorted_files = sort {$a eq $b} @files; # /permanent_storage/2020/04/02/IE201080/V2-FL201091.xml
my $files = find_newest_iefile_version ( \@sorted_files ); sort_file({
foreach my $file (@{ $files }) { I => $tmp_ies_unsorted_file->absolute()->stringify,
my $ret = parse_iexml($file); o => $tmp_ies_sorted_file->absolute()->stringify,
});
find_newest_iefile_version ($tmp_ies_sorted_file, $tmp_ies_truncated_file );
# now operate on truncated
my $fh = $tmp_ies_truncated_file->openr();
my $count=0;
while( <$fh> ) {
chomp;
$count++;
s/V(0*)(\d+-IE)/V$2/; # revert fake version
my $ret = parse_iexml($_);
write_addsql($ret); write_addsql($ret);
} }
write_index_creation(); write_index_creation();
warn "processed $count uniq IEs\n;"
} else { } else {
die "no directory given on commandline" die "no directory given on commandline"
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment