Skip to content
Snippets Groups Projects
Commit da922e08 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- improved UI experience

- prune depth search if IE-XML is found
parent a79401b9
Branches
Tags
No related merge requests found
......@@ -284,13 +284,13 @@ sub parse_iexml ($$) {
if (defined $value) {
$value =~ s/\n/ /g;
$value =~ s/'/\\'/g;
}
check_if_db_conform($value, $filename);
my @pair;
push @pair, $key;
push @pair, $value;
push @dcrecords, \@pair;
}
}
############################################
# get right representation ID (has a dnx-section with <key id=label>LOCAL</key>)
my $compiled_xpath_amdsecs = '/mets:mets/mets:amdSec';
......@@ -390,12 +390,54 @@ sub find_newest_iefile_version ($$$) {
return $cnt_truncated;
}
sub searching_ie_files ($$) {
my $dir = shift;
my $tmp_ies_unsorted_file = shift;
my $cnt_unsorted_files = 0;
my $first_two_levels_of_dirs = 0;
my $wanted_twolevel_dircount = sub {
my $relpath = $File::Find::name;
$relpath =~ s{^\Q$dir\E/?}{};
my $depth = File::Spec->splitdir($relpath);
$depth >= 2
and $File::Find::prune = 1;
if (-d $_) { $first_two_levels_of_dirs++;}
};
find( $wanted_twolevel_dircount, $dir);
my $progressbar=Time::Progress->new(min => 0, max => $first_two_levels_of_dirs, smoothing => 1);
my $dircount = 0;
my $wanted_process_sip = sub {
if (-f && m/V(\d+)-IE\d+\.xml$/) {
my $version = $1;
my $file=$File::Find::name;
# fake name to ue alphabetical sort
my $fakeversion = sprintf("%05i",$version);
$file =~s/V(\d+)-IE/V$fakeversion-IE/;
$tmp_ies_unsorted_file -> append( $file."\n");
$cnt_unsorted_files++;
$File::Find::prune =1;
} elsif (-d ) {
my $relpath = $File::Find::name;
$relpath =~ s{^\Q$dir\E/?}{};
my $depth = File::Spec->splitdir($relpath);
if ($depth <= 2) {
print $progressbar->report("find IE files: %40b ETA: %E \r", $dircount++);
}
}
return;
};
find($wanted_process_sip, $dir);
say "";
return $cnt_unsorted_files;
}
my $tmp_ies_dir = Path::Tiny->tempdir( TEMPLATE => "exitstrategy_XXXXXXXXXXX", CLEANUP => 1);
my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies");
my $tmp_ies_sorted_file = $tmp_ies_dir->child("sorted_ies");
my $tmp_ies_truncated_file = $tmp_ies_dir->child("truncated_ies");
my $cnt_unsorted_files=0;
###############################################################################
###############################################################################
############# main ############################################################
......@@ -410,32 +452,16 @@ sub find_newest_iefile_version ($$$) {
if ($#ARGV_tail != 1) {
die "you need a SQL-file and a directory as argument\n";
}
sub process_sip () {
my $file=$File::Find::name;
if ($file =~ m/V(\d+)-IE\d+\.xml$/) {
# fake name to ue alphabetical sort
my $version = $1;
my $fakeversion = sprintf("%05i",$version);
$file =~s/V(\d+)-IE/V$fakeversion-IE/;
$tmp_ies_unsorted_file -> append( $file."\n");
$cnt_unsorted_files++;
}
return;
}
if (defined $recovery) { warn "recovery enabled for XML processing\n"; }
my $sqlfile = shift @ARGV_tail;
if($sqlfile !~ m/[A-Za-z0-9]+\.sql$/) {die "SQL file should be named like 'foo.sql', but was '$sqlfile'\n";}
my $dir = shift @ARGV_tail;
open(my $fh, ">:encoding(UTF-8)", "$sqlfile") || die "could not open file '$sqlfile' for writing, $!";
if (defined $dir && -d "$dir") {
say "preparing SQL";
write_database_creation($fh);
write_tables_creation($fh);
write_prepare_insert($fh);
$tmp_ies_unsorted_file->touch();
say "searching IE files";
find(\&process_sip, $dir);
my $cnt_unsorted_files = searching_ie_files($dir, $tmp_ies_unsorted_file);
# /permanent_storage/2020/04/02/IE201080/V1-FL201091.xml
# /permanent_storage/2020/04/02/IE201080/V2-FL201091.xml
say "sorting IE files";
......@@ -447,22 +473,27 @@ sub find_newest_iefile_version ($$$) {
# now operate on truncated
my $fh_truncated_IEs = $tmp_ies_truncated_file->openr();
my $count=0;
my $p=Time::Progress->new(min => 0, max => $cnt_truncated_files);
my $progressbar =Time::Progress->new(min => 0, max => $cnt_truncated_files, smoothing => 1);
open(my $fh, ">:encoding(UTF-8)", "$sqlfile") || die "could not open file '$sqlfile' for writing, $!";
write_database_creation($fh);
write_tables_creation($fh);
write_prepare_insert($fh);
while( <$fh_truncated_IEs> ) {
chomp;
print $p->report("parse IE files: %40b ETA: %E \r", $count++);
s/V(0*)(\d+-IE)/V$2/; # revert fake version
print $progressbar->report("parse IE files: %40b ETA: %E \r", $count++);
s/V0*(\d+-IE)/V$1/; # revert fake version
my $ret = parse_iexml($_, $recovery);
write_addsql($fh, $ret);
}
say "";
write_index_creation($fh);
close ($fh);
say "processed $count uniq IEs";
} else {
die "no directory given on commandline"
}
print "\n";
close ($fh);
1;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment