diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index f1b080f4c52c508ee6249d86ed806444d0462404..8e5aa819f039ae5f1240d0de4bee1ab836ffc638 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -30,6 +30,9 @@ # # perl ./exit_strategy.pl --recover rosetta_exit_strategy/tmp.db /permanent/ # +# +# call help with: +# perl ./exit_strategy.pl --help ############################################################################### @@ -47,6 +50,7 @@ use XML::LibXML::XPathContext; use DBD::SQLite; use DBI; use Getopt::Long; +use Pod::Usage; use constant DEBUG => 0; # no debug use IO::Handle; @@ -390,51 +394,6 @@ sub check_if_db_conform ($string, $filename) { } } -############################################################################### -# because ExLibris Rosetta produces filenames of following format: -# V\d+-IE\d+\.xml -# e.G.: -# V1-IE23891.xml -# V1-IE94621.xml -# V2-IE23891.xml -# … -# we must find the relevant file with highest V-value, in example the file -# "V2-IE23891.xml" -# -# this function gets an array reference with all possible files of given regEx -# and returns an array reference with reduced files using only highest V-value -# HINT, it only operates on sorted file list with fake versions (with zero-filled prefixes) -################################################################################ -sub find_newest_iefile_version ($files_sorted, $files_truncated, $cnt_files) { - my $cnt_truncated = 0; - my $fh = $files_sorted->filehandle("<"); - my $last_entry; - # FIXME, how many lines? - my $p = Time::Progress->new(min=>0, max=> $cnt_files); - my $i=0; - while(<$fh>) { - my $entry = $_; - $entry =~ m/^(.+?V)(\d+)(-IE\d+\.xml)$/; - if (!defined $last_entry) { - $last_entry = $entry; - } - my ($prefix, $version, $suffix) = ($1, $2, $3); - $last_entry =~ m/^(.+?V)(\d+)(-IE\d+\.xml)$/; - my ($last_prefix, $last_version, $last_suffix) = ($1, $2, $3); - if (($last_prefix eq $prefix ) && ($last_suffix eq $suffix) && ($last_version < $version)) { - } else { - $files_truncated->append($last_entry); - $cnt_truncated++; - } - print $p->report("find newest IE files: %40b ETA: %E \r", $i++); - $last_entry = $entry; - } - $files_truncated->append($last_entry); - $cnt_truncated++; - say ""; - return $cnt_truncated; -} - sub searching_ie_files ($dir, $tmp_ies_unsorted_file) { my $cnt_unsorted_files = 0; my $first_two_levels_of_dirs = 0; @@ -479,65 +438,85 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) { return $cnt_unsorted_files; } - - ############################################################################### ############################################################################### ############# main ############################################################ ############################################################################### ############################################################################### -my $tmp_ies_dir = Path::Tiny->tempdir( TEMPLATE => "exitstrategy_XXXXXXXXXXX", CLEANUP => 1); +my $tmp_ies_dir = Path::Tiny->tempdir(TEMPLATE => "exitstrategy_XXXXXXXXXXX", CLEANUP => 1); my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies"); - my $flag_recovery = undef; - my $flag_sqldump = undef; - my $db_filename = $db_name.".db"; - my @ARGV_tail; - GetOptions( - "recovery" => \$flag_recovery, - "sqlitedb-file=s" => \$db_filename, - "enable_sqldump" => \$flag_sqldump, - '<>' => sub {push @ARGV_tail, @_;} - ); - if ($#ARGV_tail < 0) { - die "you need a directory as argument\n"; - } - if (defined $flag_recovery) { warn "recovery enabled for XML processing\n"; } - - my $dir = shift @ARGV_tail; +my $flag_recovery = undef; +my $flag_sqldump = undef; +my $db_filename = $db_name . ".db"; +my @ARGV_tail; +GetOptions( + "help|?" => sub { + say <<"HELP"; +call $0 with following options + --help | this help + --recovery | set special recovery flag + --sqlitedb-file=FILE | set database to file FILE + --enable-sqldump | dumps a given database as SQL to STDOUT + + create an exit-DB: + $0 [--recovery] [--sqlitedb-file=FILE] DIR + + dump an exit-DB as SQL + $0 [--sqlitedb-file=FILE] --enable-sqldump +HELP + exit; + }, + "recovery" => \$flag_recovery, + "sqlitedb-file=s" => \$db_filename, + "enable_sqldump" => \$flag_sqldump, + '<>' => sub {push @ARGV_tail, @_;} +) or pod2usage(2); + +if (defined $flag_recovery) {warn "recovery enabled for XML processing\n";} +if (defined $flag_sqldump) { + open my $cmd, '|-', '/usr/bin/sqlite3', $db_filename, '.dump' or die "Failed to dump DB: $!\n"; + close $cmd or die "Failed to dumb DB: $!\n"; + exit; +} +if ($#ARGV_tail < 0) { + die "you need a directory as argument\n"; +} - if (defined $dir && -d "$dir") { - say "preparing SQL"; - $tmp_ies_unsorted_file->touch(); - say "searching IE files"; - my $cnt_unsorted_files = searching_ie_files($dir, $tmp_ies_unsorted_file); - # /permanent_storage/2020/04/02/IE201080/V1-FL201091.xml - # /permanent_storage/2020/04/02/IE201080/V2-FL201091.xml - my $fh_unsorted_IEs = $tmp_ies_unsorted_file->openr(); - my $count=0; - my $progressbar =Time::Progress->new(min => 0, max => $cnt_unsorted_files, smoothing => 1); - my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "", { - RaiseError => 1, - sqlite_unicode => 1, - }) or die "could not connect to database (file '$db_filename')", $DBI::errstr; - write_database_creation($dbh); - write_tables_creation($dbh); - write_prepare_insert($dbh); - write_index_creation($dbh); - while( <$fh_unsorted_IEs> ) { - chomp; - print $progressbar->report("parse IE files: %40b ETA: %E \r", $count++); - s/V0*(\d+-IE)/V$1/; # revert fake version - my $ret = parse_iexml($_, $flag_recovery); - write_addsql($dbh, $ret); - } - say ""; - $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr); - say "processed $count uniq IEs"; - } else { - die "no directory given on commandline" +my $dir = shift @ARGV_tail; + +if (defined $dir && -d "$dir") { + say "preparing SQL"; + $tmp_ies_unsorted_file->touch(); + say "searching IE files"; + my $cnt_unsorted_files = searching_ie_files($dir, $tmp_ies_unsorted_file); + # /permanent_storage/2020/04/02/IE201080/V1-FL201091.xml + # /permanent_storage/2020/04/02/IE201080/V2-FL201091.xml + my $fh_unsorted_IEs = $tmp_ies_unsorted_file->openr(); + my $count = 0; + my $progressbar = Time::Progress->new(min => 0, max => $cnt_unsorted_files, smoothing => 1); + my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "", { + RaiseError => 1, + sqlite_unicode => 1, + }) or die "could not connect to database (file '$db_filename')", $DBI::errstr; + write_database_creation($dbh); + write_tables_creation($dbh); + write_prepare_insert($dbh); + write_index_creation($dbh); + while (<$fh_unsorted_IEs>) { + chomp; + print $progressbar->report("parse IE files: %40b ETA: %E \r", $count++); + s/V0*(\d+-IE)/V$1/; # revert fake version + my $ret = parse_iexml($_, $flag_recovery); + write_addsql($dbh, $ret); } - print "\n"; - + say; + $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr); + say "processed $count uniq IEs"; +} +else { + die "no directory given on commandline" +} +say; 1;