From dff59cb9facae2af0bb13dc060fdf8c62b11ecb6 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <art1@andreas-romeyke.de>
Date: Thu, 22 Jul 2021 10:02:29 +0200
Subject: [PATCH] - changed to use SQLite

---
 perl/exit_strategy.pl | 224 +++++++++++++++++++++++-------------------
 1 file changed, 123 insertions(+), 101 deletions(-)

diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl
index 7b1519d..4fc8eae 100644
--- a/perl/exit_strategy.pl
+++ b/perl/exit_strategy.pl
@@ -51,6 +51,8 @@ use File::Sort qw(sort_file);
 use XML::LibXML;
 use Time::Progress;
 use XML::LibXML::XPathContext;
+use DBD::SQLite;
+use DBI;
 use Getopt::Long;
 use constant DEBUG => 0; # no debug
 
@@ -78,83 +80,68 @@ sub write_database_creation ($fh) {
 }
 
 # write tables creation;:
-sub write_tables_creation ($fh) {
-  # Transactions for tables creation
-  say $fh "BEGIN;";
-
-  # SEQUENCE
-  say $fh "/* create SEQUENCE generator */";
-  say $fh "CREATE SEQUENCE serial START 1;";
+sub write_tables_creation ($dbh) {
+  my $sql1=<<"SQL_CREATE1";
+  /* create AIP table */
+  CREATE TABLE aip (
+	id	INTEGER,
+	ie_id	TEXT NOT NULL UNIQUE,
+	PRIMARY KEY(id AUTOINCREMENT)
+  );
+SQL_CREATE1
+  my $sql2=<<"SQL_CREATE2";
+  /* create IEFILE table */
+  CREATE TABLE metadatafile (
+    id INTEGER,
+    aip_id INTEGER NOT NULL REFERENCES aip (id),
+    location TEXT NOT NULL,
+    sourcetype TEXT NOT NULL,
+    PRIMARY KEY(id AUTOINCREMENT)
+  );
+SQL_CREATE2
+  my $sql3=<<"SQL_CREATE3";
+  /* create DC table */
+  CREATE TABLE dc (
+    id INTEGER,
+    aip_id INTEGER NOT NULL REFERENCES aip (id),
+    element TEXT NOT NULL,
+    value TEXT NOT NULL,
+    PRIMARY KEY(id AUTOINCREMENT)
+  );
+SQL_CREATE3
+  my $sql4=<<"SQL_CREATE4";
+  /* create FILE table */
+  CREATE TABLE sourcedatafile (
+    id INTEGER,
+    aip_id INTEGER NOT NULL REFERENCES aip (id),
+    name TEXT NOT NULL,
+    PRIMARY KEY(id AUTOINCREMENT)
+  );
+SQL_CREATE4
+  my $sql5=<<"SQL_CREATE5";
+  /* create LOCAT table */
+  CREATE TABLE sourcedatalocat (
+    id INTEGER,
+    file_id INTEGER NOT NULL REFERENCES sourcedatafile (id),
+    location TEXT NOT NULL,
+    sourcetype TEXT NOT NULL,
+    PRIMARY KEY(id AUTOINCREMENT)
+  );
+SQL_CREATE5
+  my $sth1 = $dbh->prepare($sql1); $sth1->execute() or die "sql problem detected", $dbh->errstr;
+  my $sth2 = $dbh->prepare($sql2); $sth2->execute() or die "sql problem detected", $dbh->errstr;
+  my $sth3 = $dbh->prepare($sql3); $sth3->execute() or die "sql problem detected", $dbh->errstr;
+  my $sth4 = $dbh->prepare($sql4); $sth4->execute() or die "sql problem detected", $dbh->errstr;
+  my $sth5 = $dbh->prepare($sql5); $sth5->execute() or die "sql problem detected", $dbh->errstr;
 
-  # AIP
-  say $fh "/* create AIP table */";
-  say $fh "CREATE TABLE aip (";
-  say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
-  say $fh "\tie_id VARCHAR(30) NOT NULL UNIQUE";
-  say $fh ");";
-  # IEFILE
-  say $fh "/* create IEFILE table */";
-  say $fh "CREATE TABLE metadatafile (";
-  say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
-  say $fh "\taip_id INT NOT NULL REFERENCES aip (id),";
-  say $fh "\tlocation VARCHAR(1024) NOT NULL,";
-  say $fh "\tsourcetype VARCHAR(30) NOT NULL";
-  say $fh ");";
-  # DC
-  say $fh "/* create DC table */";
-  say $fh "CREATE TABLE dc (";
-  say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
-  say $fh "\taip_id INT NOT NULL REFERENCES aip (id),";
-  say $fh "\telement VARCHAR(30) NOT NULL,";
-  say $fh "\tvalue VARCHAR(8192) NOT NULL";
-  say $fh ");";
-  # FILE
-  say $fh "/* create FILE table */";
-  say $fh "CREATE TABLE sourcedatafile (";
-  say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'), ";
-  say $fh "\taip_id INT NOT NULL REFERENCES aip (id),";
-  say $fh "\tname VARCHAR(1024) NOT NULL";
-  say $fh ");";
-  # LOCAT
-  say $fh "/* create LOCAT table */";
-  say $fh "CREATE TABLE sourcedatalocat (";
-  say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
-  say $fh "\tfile_id INT NOT NULL REFERENCES sourcedatafile (id),";
-  say $fh "\tlocation VARCHAR(1024) NOT NULL,";
-  say $fh "\tsourcetype VARCHAR(30) NOT NULL";
-  say $fh ");";
-  #end transaction
-  say $fh "COMMIT;";
-  return;
+  return 1;
 }
 
 ###############################################################################
 # Prepare SQL INSERT Statements for AIPs
 ###############################################################################
-sub write_prepare_insert ($fh) {
-  say $fh "BEGIN;";
-  say $fh "PREPARE aip_plan (varchar) AS";
-  say $fh "  INSERT INTO aip (ie_id) VALUES (\$1);";
-  say $fh "PREPARE ie_plan (varchar, varchar, varchar) AS";
-  say $fh "  INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES (";
-  say $fh "    (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3";
-  say $fh "  );";
-  say $fh "PREPARE file_plan (varchar, varchar) AS";
-  say $fh "  INSERT INTO sourcedatafile (aip_id, name) VALUES (";
-  say $fh "    (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2";
-  say $fh "  );";
-  say $fh "PREPARE locat_plan (varchar, varchar, varchar, varchar) AS";
-  say $fh "  INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES (";
-  say $fh "    (SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE";
-  say $fh "    sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND";
-  say $fh "    sourcedatafile.name=\$2), \$3, \$4";
-  say $fh "  );";
-  say $fh "PREPARE dc_plan (varchar, varchar, varchar) AS";
-  say $fh "  INSERT INTO dc (aip_id, element, value) VALUES (";
-  say $fh "    (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3";
-  say $fh "  );";
-  say $fh "COMMIT;";
-  return;
+sub write_prepare_insert ($dbh) {
+  return 1;
 }
 
 
@@ -179,27 +166,55 @@ sub write_prepare_insert ($fh) {
 #     $ret{"files"} = \@files;
 #     $ret{"dcrecords"} = \@dcrecords;
 ###############################################################################
-sub write_addsql ($fh, $refhash) {
+sub write_addsql ($dbh, $refhash) {
   my $ieid = path($refhash->{"filename"})->basename(qw/.xml/);
-  say $fh "BEGIN;";
-  say $fh "EXECUTE aip_plan ('$ieid');";
+  my $sql_aip_plan=<<"SQL_AIP_PLAN";
+  INSERT INTO aip (ie_id) VALUES (\$1);
+SQL_AIP_PLAN
+  my $sql_ie_plan=<<"SQL_IE_PLAN";
+  INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES (
+      (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3
+    );
+SQL_IE_PLAN
+  my $sql_file_plan=<<"SQL_FILE_PLAN";
+    INSERT INTO sourcedatafile (aip_id, name) VALUES (
+      (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2
+    );
+SQL_FILE_PLAN
+  my $sql_locat_plan=<<"SQL_LOCAT_PLAN";
+    INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES (
+      (SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE
+      sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND
+      sourcedatafile.name=\$2), \$3, \$4
+    );
+SQL_LOCAT_PLAN
+  my $sql_dc_pan=<<"SQL_DC_PLAN";
+    INSERT INTO dc (aip_id, element, value) VALUES (
+      (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3
+    );
+SQL_DC_PLAN
+  my $sth_aip_plan = $dbh->prepare($sql_aip_plan);
+  my $sth_ie_plan = $dbh->prepare($sql_ie_plan);
+  my $sth_file_plan = $dbh->prepare($sql_file_plan);
+  my $sth_locat_plan = $dbh->prepare($sql_locat_plan);
+  my $sth_dc_plan = $dbh->prepare($sql_dc_pan);
+  # start SQL insert
+  $sth_aip_plan->execute($ieid)  or die "sql problem detected", $dbh->errstr;
   # FIXME if multiple locations exists
   my $iefile = path($refhash->{"filename"})->basename();
-  say $fh "EXECUTE ie_plan ('$ieid', '$iefile', '$sourcetype');";
+  $sth_ie_plan->execute( $ieid, $iefile, $sourcetype)  or die "sql problem detected", $dbh->errstr;
   foreach my $location (@{$refhash->{"files"}}) {
     my $file = path($location)->basename(); # FIXME if multiple locations
-    say $fh "EXECUTE file_plan ('$ieid', '$file');";
-    say $fh "EXECUTE locat_plan ('$ieid', '$file', '$location', '$sourcetype' );";
+    $sth_file_plan->execute($ieid, $file)  or die "sql problem detected", $dbh->errstr;
+    $sth_locat_plan->execute($ieid, $file, $location, $sourcetype)  or die "sql problem detected", $dbh->errstr;
   }
   foreach my $dcpair   (@{$refhash->{"dcrecords"}}) {
     my ($dckey,$dcvalue) = @{$dcpair};
     # quote ' in dcvalue
     $dcvalue=~tr/'/"/;
-    say $fh "EXECUTE dc_plan ( '$ieid', '$dckey', '$dcvalue');";
+    $sth_dc_plan->execute($ieid, $dckey, $dcvalue)  or die "sql problem detected", $dbh->errstr;
   }
-  say $fh "COMMIT;";
-  say $fh "\n";
-  return;
+  return 1;
 }
 
 
@@ -207,11 +222,15 @@ sub write_addsql ($fh, $refhash) {
 ###############################################################################
 # add INDEX and other TRICKs to increase performance
 ###############################################################################
-sub write_index_creation($fh) {
-  say $fh "-- BEGIN;";
-  say $fh "-- CREATE UNIQUE INDEX aip_index on aip (ie_id);";
-  say $fh "-- COMMIT;";
-  return;
+sub write_index_creation($dbh) {
+  my $sql=<<"SQL_INDEX";
+  -- BEGIN;
+  -- CREATE UNIQUE INDEX aip_index on aip (ie_id);
+  -- COMMIT;
+SQL_INDEX
+  my $sth = $dbh->prepare($sql);
+  $sth->execute() or die "sql problem detected", $dbh->errstr;
+  return 1;
 }
 
 ###############################################################################
@@ -456,18 +475,21 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
 ############# main ############################################################
 ###############################################################################
 ###############################################################################
-  my $recovery = undef;
+  my $flag_recovery = undef;
+  my $flag_sqldump = undef;
+  my $db_filename = $db_name.".db";
   my @ARGV_tail;
   GetOptions(
-      "recovery" => \$recovery,
+      "recovery" => \$flag_recovery,
+      "sqlitedb-file=s" => \$db_filename,
+      "enable_sqldump" => \$flag_sqldump,
       '<>' => sub {push @ARGV_tail, @_;}
   );
-  if ($#ARGV_tail != 1) {
-      die "you need  a SQL-file and a directory as argument\n";
+  if ($#ARGV_tail < 0) {
+      die "you need  a directory as argument\n";
   }
-  if (defined $recovery) { warn "recovery enabled for XML processing\n"; }
-  my $sqlfile = shift @ARGV_tail;
-  if($sqlfile !~ m/[A-Za-z0-9]+\.sql$/) {die "SQL file should be named like 'foo.sql', but was '$sqlfile'\n";}
+  if (defined $flag_recovery) { warn "recovery enabled for XML processing\n"; }
+
   my $dir = shift @ARGV_tail;
 
   if (defined $dir && -d "$dir") {
@@ -487,20 +509,20 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
     my $fh_truncated_IEs = $tmp_ies_truncated_file->openr();
     my $count=0;
     my $progressbar =Time::Progress->new(min => 0, max => $cnt_truncated_files, smoothing => 1);
-    open(my $fh, ">:encoding(UTF-8)", "$sqlfile") || die "could not open file '$sqlfile' for writing, $!";
-    write_database_creation($fh);
-    write_tables_creation($fh);
-    write_prepare_insert($fh);
+    my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "") or die "could not connect to database (file '$db_filename')", $DBI::errstr;
+    write_database_creation($dbh);
+    write_tables_creation($dbh);
+    write_prepare_insert($dbh);
     while( <$fh_truncated_IEs> ) {
       chomp;
       print $progressbar->report("parse IE files:       %40b  ETA: %E   \r", $count++);
       s/V0*(\d+-IE)/V$1/; # revert fake version
-      my $ret = parse_iexml($_, $recovery);
-      write_addsql($fh, $ret);
+      my $ret = parse_iexml($_, $flag_recovery);
+      write_addsql($dbh, $ret);
     }
     say "";
-    write_index_creation($fh);
-    close ($fh);
+    write_index_creation($dbh);
+    $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr);
     say "processed $count uniq IEs";
   } else {
     die "no directory given on commandline"
-- 
GitLab