From effe3162f8269cf4ec67f7b37c183ca6bc086484 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Wed, 4 Aug 2021 11:54:03 +0200
Subject: [PATCH] - added CLI option "--continue" - improved transactions
 around calls of write_addsql() for each IE

---
 perl/exit_strategy.pl | 65 +++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl
index 7fb4ad0..b5c8aba 100644
--- a/perl/exit_strategy.pl
+++ b/perl/exit_strategy.pl
@@ -43,6 +43,7 @@ no warnings "experimental::signatures";
 use utf8; # for debugging output
 use Carp;
 use Path::Tiny;
+use File::Basename qw(basename);
 use File::Find;
 use XML::LibXML;
 use Time::Progress;
@@ -79,8 +80,12 @@ SQL_PRAGMA_WAL
   my $sql2=<<"SQL_PRAGMA_ASYNC";
 PRAGMA synchronous = OFF;
 SQL_PRAGMA_ASYNC
+  my $sql3=<<"SQL_PRAGMA_TEMP";
+PRAGMA temp_store = OFF;
+SQL_PRAGMA_TEMP
   my $sth1 = $dbh->prepare($sql1); $sth1->execute() or die "sql problem detected", $dbh->errstr;
   my $sth2 = $dbh->prepare($sql2); $sth2->execute() or die "sql problem detected", $dbh->errstr;
+  my $sth3 = $dbh->prepare($sql3); $sth3->execute() or die "sql problem detected", $dbh->errstr;
   return;
 }
 
@@ -212,21 +217,21 @@ SQL_DC_PLAN
 #     $ret{"dcrecords"} = \@dcrecords;
 ###############################################################################
 sub write_addsql ($dbh, $plans, $refhash) {
-  my $iefile = path($refhash->{"filename"})->basename();
+  my $iefile = basename($refhash->{"filename"});
   my ($ieid,$ieversion);
   if ($iefile =~ m/^V(\d+)-(IE\d*)\.xml$/) {
     $ieversion=$1; $ieid=$2;
   } else {
     die "Could not detect PID and Version from IEFile '$iefile'\n";
   }
-  $dbh->begin_work;
+
   # start SQL insert
   $plans->{aip}->execute($ieid, $ieversion)  or die "sql problem detected", $dbh->errstr;
   # FIXME if multiple locations exists
 
   $plans->{ie}->execute( $ieid, $ieversion, $iefile, $sourcetype)  or die "sql problem detected", $dbh->errstr;
   foreach my $location (@{$refhash->{"files"}}) {
-    my $file = path($location)->basename(); # FIXME if multiple locations
+    my $file = basename($location); # FIXME if multiple locations
     $plans->{file}->execute($ieid, $ieversion, $file)  or die "sql problem detected", $dbh->errstr;
     $plans->{locat}->execute($ieid, $ieversion, $file, $location, $sourcetype)  or die "sql problem detected", $dbh->errstr;
   }
@@ -236,7 +241,6 @@ sub write_addsql ($dbh, $plans, $refhash) {
     $dcvalue=~tr/'/"/;
     $plans->{dc}->execute($ieid, $ieversion, $dckey, $dcvalue)  or die "sql problem detected", $dbh->errstr;
   }
-  $dbh->commit;
   return 1;
 }
 
@@ -477,16 +481,18 @@ my $tmp_ies_dir = Path::Tiny->tempdir(TEMPLATE => "exitstrategy_XXXXXXXXXXX", CL
 my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies");
 my $flag_recovery = undef;
 my $flag_sqldump = undef;
+my $flag_continue = undef;
 my $db_filename = $db_name . ".db";
 my @ARGV_tail;
 GetOptions(
     "help|?"          => sub {
       say <<"HELP";
 call $0 with following options
-        --help | this help
-        --recovery | set special recovery flag
-        --sqlitedb-file=FILE | set database to file FILE
-        --enable-sqldump | dumps a given database as SQL to STDOUT
+        --help ............... this help
+        --recovery ........... set special recovery flag
+        --continue ........... tries to add IEs to existing database, ignores IEs which already exists (dangerous!)
+        --sqlitedb-file=FILE . set database to file FILE
+        --enable-sqldump ..... dumps a given database as SQL to STDOUT
 
         create an exit-DB:
         $0 [--recovery] [--sqlitedb-file=FILE] DIR [DIR…]
@@ -496,6 +502,7 @@ call $0 with following options
 HELP
       exit;
     },
+    "continue"        => \$flag_continue,
     "recovery"        => \$flag_recovery,
     "sqlitedb-file=s" => \$db_filename,
     "enable-sqldump"  => \$flag_sqldump,
@@ -512,17 +519,23 @@ if ($#ARGV_tail < 0) {
   die "you need at least a directory as argument\n";
 }
 
-say "preparing SQL";
-my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "", {
-    RaiseError     => 1,
-    sqlite_unicode => 1,
-}) or die "could not connect to database (file '$db_filename')", $DBI::errstr;
-write_database_creation($dbh);
-write_tables_creation($dbh);
-write_prepare_insert($dbh);
-write_index_creation($dbh);
-$dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr);
-
+if (
+    (! defined $flag_continue)
+        || (! -e $db_filename)
+) {
+  say "preparing SQL";
+  my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "", {
+      RaiseError     => 1,
+      sqlite_unicode => 1,
+  }) or die "could not connect to database (file '$db_filename')", $DBI::errstr;
+  write_database_creation($dbh);
+  write_tables_creation($dbh);
+  write_prepare_insert($dbh);
+  write_index_creation($dbh);
+  $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr);
+} else {
+  say "using existing DB (continue mode!)";
+}
 if (0==@ARGV_tail ){
     die "no directory given on commandline"
 }
@@ -541,7 +554,7 @@ my $cnt_unsorted_files = searching_ie_files(\@dirs, $tmp_ies_unsorted_file);
 my $fh_unsorted_IEs = $tmp_ies_unsorted_file->openr();
 my $count = 0;
 my $progressbar = Time::Progress->new(min => 0, max => $cnt_unsorted_files, smoothing => 1);
-$dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "", {
+my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "", {
     RaiseError     => 1,
     sqlite_unicode => 1,
 }) or die "could not connect to database (file '$db_filename')", $DBI::errstr;
@@ -552,8 +565,18 @@ while (<$fh_unsorted_IEs>) {
   print $progressbar->report("parse IE files:       %40b  ETA: %E   \r", $count++);
   s/V0*(\d+-IE)/V$1/; # revert fake version
   my $ret = parse_iexml($_, $flag_recovery);
-  write_addsql($dbh, $plans, $ret);
+  $dbh->begin_work;
+  eval {
+    write_addsql($dbh, $plans, $ret);
+  };
+  if ($@) {
+    $dbh->rollback;
+    say "Rollback, because $@";
+  } else {
+    $dbh->commit;
+  }
 }
+
 $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr);
 say "\rprocessed $count uniq IEs                                                                                      ";
 say "";
-- 
GitLab