From dff59cb9facae2af0bb13dc060fdf8c62b11ecb6 Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <art1@andreas-romeyke.de> Date: Thu, 22 Jul 2021 10:02:29 +0200 Subject: [PATCH] - changed to use SQLite --- perl/exit_strategy.pl | 224 +++++++++++++++++++++++------------------- 1 file changed, 123 insertions(+), 101 deletions(-) diff --git a/perl/exit_strategy.pl b/perl/exit_strategy.pl index 7b1519d..4fc8eae 100644 --- a/perl/exit_strategy.pl +++ b/perl/exit_strategy.pl @@ -51,6 +51,8 @@ use File::Sort qw(sort_file); use XML::LibXML; use Time::Progress; use XML::LibXML::XPathContext; +use DBD::SQLite; +use DBI; use Getopt::Long; use constant DEBUG => 0; # no debug @@ -78,83 +80,68 @@ sub write_database_creation ($fh) { } # write tables creation;: -sub write_tables_creation ($fh) { - # Transactions for tables creation - say $fh "BEGIN;"; - - # SEQUENCE - say $fh "/* create SEQUENCE generator */"; - say $fh "CREATE SEQUENCE serial START 1;"; +sub write_tables_creation ($dbh) { + my $sql1=<<"SQL_CREATE1"; + /* create AIP table */ + CREATE TABLE aip ( + id INTEGER, + ie_id TEXT NOT NULL UNIQUE, + PRIMARY KEY(id AUTOINCREMENT) + ); +SQL_CREATE1 + my $sql2=<<"SQL_CREATE2"; + /* create IEFILE table */ + CREATE TABLE metadatafile ( + id INTEGER, + aip_id INTEGER NOT NULL REFERENCES aip (id), + location TEXT NOT NULL, + sourcetype TEXT NOT NULL, + PRIMARY KEY(id AUTOINCREMENT) + ); +SQL_CREATE2 + my $sql3=<<"SQL_CREATE3"; + /* create DC table */ + CREATE TABLE dc ( + id INTEGER, + aip_id INTEGER NOT NULL REFERENCES aip (id), + element TEXT NOT NULL, + value TEXT NOT NULL, + PRIMARY KEY(id AUTOINCREMENT) + ); +SQL_CREATE3 + my $sql4=<<"SQL_CREATE4"; + /* create FILE table */ + CREATE TABLE sourcedatafile ( + id INTEGER, + aip_id INTEGER NOT NULL REFERENCES aip (id), + name TEXT NOT NULL, + PRIMARY KEY(id AUTOINCREMENT) + ); +SQL_CREATE4 + my $sql5=<<"SQL_CREATE5"; + /* create LOCAT table */ + CREATE TABLE sourcedatalocat ( + id INTEGER, + file_id INTEGER NOT NULL REFERENCES sourcedatafile (id), + location TEXT NOT NULL, + sourcetype TEXT NOT NULL, + PRIMARY KEY(id AUTOINCREMENT) + ); +SQL_CREATE5 + my $sth1 = $dbh->prepare($sql1); $sth1->execute() or die "sql problem detected", $dbh->errstr; + my $sth2 = $dbh->prepare($sql2); $sth2->execute() or die "sql problem detected", $dbh->errstr; + my $sth3 = $dbh->prepare($sql3); $sth3->execute() or die "sql problem detected", $dbh->errstr; + my $sth4 = $dbh->prepare($sql4); $sth4->execute() or die "sql problem detected", $dbh->errstr; + my $sth5 = $dbh->prepare($sql5); $sth5->execute() or die "sql problem detected", $dbh->errstr; - # AIP - say $fh "/* create AIP table */"; - say $fh "CREATE TABLE aip ("; - say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),"; - say $fh "\tie_id VARCHAR(30) NOT NULL UNIQUE"; - say $fh ");"; - # IEFILE - say $fh "/* create IEFILE table */"; - say $fh "CREATE TABLE metadatafile ("; - say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),"; - say $fh "\taip_id INT NOT NULL REFERENCES aip (id),"; - say $fh "\tlocation VARCHAR(1024) NOT NULL,"; - say $fh "\tsourcetype VARCHAR(30) NOT NULL"; - say $fh ");"; - # DC - say $fh "/* create DC table */"; - say $fh "CREATE TABLE dc ("; - say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),"; - say $fh "\taip_id INT NOT NULL REFERENCES aip (id),"; - say $fh "\telement VARCHAR(30) NOT NULL,"; - say $fh "\tvalue VARCHAR(8192) NOT NULL"; - say $fh ");"; - # FILE - say $fh "/* create FILE table */"; - say $fh "CREATE TABLE sourcedatafile ("; - say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'), "; - say $fh "\taip_id INT NOT NULL REFERENCES aip (id),"; - say $fh "\tname VARCHAR(1024) NOT NULL"; - say $fh ");"; - # LOCAT - say $fh "/* create LOCAT table */"; - say $fh "CREATE TABLE sourcedatalocat ("; - say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),"; - say $fh "\tfile_id INT NOT NULL REFERENCES sourcedatafile (id),"; - say $fh "\tlocation VARCHAR(1024) NOT NULL,"; - say $fh "\tsourcetype VARCHAR(30) NOT NULL"; - say $fh ");"; - #end transaction - say $fh "COMMIT;"; - return; + return 1; } ############################################################################### # Prepare SQL INSERT Statements for AIPs ############################################################################### -sub write_prepare_insert ($fh) { - say $fh "BEGIN;"; - say $fh "PREPARE aip_plan (varchar) AS"; - say $fh " INSERT INTO aip (ie_id) VALUES (\$1);"; - say $fh "PREPARE ie_plan (varchar, varchar, varchar) AS"; - say $fh " INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES ("; - say $fh " (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3"; - say $fh " );"; - say $fh "PREPARE file_plan (varchar, varchar) AS"; - say $fh " INSERT INTO sourcedatafile (aip_id, name) VALUES ("; - say $fh " (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2"; - say $fh " );"; - say $fh "PREPARE locat_plan (varchar, varchar, varchar, varchar) AS"; - say $fh " INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES ("; - say $fh " (SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE"; - say $fh " sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND"; - say $fh " sourcedatafile.name=\$2), \$3, \$4"; - say $fh " );"; - say $fh "PREPARE dc_plan (varchar, varchar, varchar) AS"; - say $fh " INSERT INTO dc (aip_id, element, value) VALUES ("; - say $fh " (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3"; - say $fh " );"; - say $fh "COMMIT;"; - return; +sub write_prepare_insert ($dbh) { + return 1; } @@ -179,27 +166,55 @@ sub write_prepare_insert ($fh) { # $ret{"files"} = \@files; # $ret{"dcrecords"} = \@dcrecords; ############################################################################### -sub write_addsql ($fh, $refhash) { +sub write_addsql ($dbh, $refhash) { my $ieid = path($refhash->{"filename"})->basename(qw/.xml/); - say $fh "BEGIN;"; - say $fh "EXECUTE aip_plan ('$ieid');"; + my $sql_aip_plan=<<"SQL_AIP_PLAN"; + INSERT INTO aip (ie_id) VALUES (\$1); +SQL_AIP_PLAN + my $sql_ie_plan=<<"SQL_IE_PLAN"; + INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES ( + (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3 + ); +SQL_IE_PLAN + my $sql_file_plan=<<"SQL_FILE_PLAN"; + INSERT INTO sourcedatafile (aip_id, name) VALUES ( + (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2 + ); +SQL_FILE_PLAN + my $sql_locat_plan=<<"SQL_LOCAT_PLAN"; + INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES ( + (SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE + sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND + sourcedatafile.name=\$2), \$3, \$4 + ); +SQL_LOCAT_PLAN + my $sql_dc_pan=<<"SQL_DC_PLAN"; + INSERT INTO dc (aip_id, element, value) VALUES ( + (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3 + ); +SQL_DC_PLAN + my $sth_aip_plan = $dbh->prepare($sql_aip_plan); + my $sth_ie_plan = $dbh->prepare($sql_ie_plan); + my $sth_file_plan = $dbh->prepare($sql_file_plan); + my $sth_locat_plan = $dbh->prepare($sql_locat_plan); + my $sth_dc_plan = $dbh->prepare($sql_dc_pan); + # start SQL insert + $sth_aip_plan->execute($ieid) or die "sql problem detected", $dbh->errstr; # FIXME if multiple locations exists my $iefile = path($refhash->{"filename"})->basename(); - say $fh "EXECUTE ie_plan ('$ieid', '$iefile', '$sourcetype');"; + $sth_ie_plan->execute( $ieid, $iefile, $sourcetype) or die "sql problem detected", $dbh->errstr; foreach my $location (@{$refhash->{"files"}}) { my $file = path($location)->basename(); # FIXME if multiple locations - say $fh "EXECUTE file_plan ('$ieid', '$file');"; - say $fh "EXECUTE locat_plan ('$ieid', '$file', '$location', '$sourcetype' );"; + $sth_file_plan->execute($ieid, $file) or die "sql problem detected", $dbh->errstr; + $sth_locat_plan->execute($ieid, $file, $location, $sourcetype) or die "sql problem detected", $dbh->errstr; } foreach my $dcpair (@{$refhash->{"dcrecords"}}) { my ($dckey,$dcvalue) = @{$dcpair}; # quote ' in dcvalue $dcvalue=~tr/'/"/; - say $fh "EXECUTE dc_plan ( '$ieid', '$dckey', '$dcvalue');"; + $sth_dc_plan->execute($ieid, $dckey, $dcvalue) or die "sql problem detected", $dbh->errstr; } - say $fh "COMMIT;"; - say $fh "\n"; - return; + return 1; } @@ -207,11 +222,15 @@ sub write_addsql ($fh, $refhash) { ############################################################################### # add INDEX and other TRICKs to increase performance ############################################################################### -sub write_index_creation($fh) { - say $fh "-- BEGIN;"; - say $fh "-- CREATE UNIQUE INDEX aip_index on aip (ie_id);"; - say $fh "-- COMMIT;"; - return; +sub write_index_creation($dbh) { + my $sql=<<"SQL_INDEX"; + -- BEGIN; + -- CREATE UNIQUE INDEX aip_index on aip (ie_id); + -- COMMIT; +SQL_INDEX + my $sth = $dbh->prepare($sql); + $sth->execute() or die "sql problem detected", $dbh->errstr; + return 1; } ############################################################################### @@ -456,18 +475,21 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) { ############# main ############################################################ ############################################################################### ############################################################################### - my $recovery = undef; + my $flag_recovery = undef; + my $flag_sqldump = undef; + my $db_filename = $db_name.".db"; my @ARGV_tail; GetOptions( - "recovery" => \$recovery, + "recovery" => \$flag_recovery, + "sqlitedb-file=s" => \$db_filename, + "enable_sqldump" => \$flag_sqldump, '<>' => sub {push @ARGV_tail, @_;} ); - if ($#ARGV_tail != 1) { - die "you need a SQL-file and a directory as argument\n"; + if ($#ARGV_tail < 0) { + die "you need a directory as argument\n"; } - if (defined $recovery) { warn "recovery enabled for XML processing\n"; } - my $sqlfile = shift @ARGV_tail; - if($sqlfile !~ m/[A-Za-z0-9]+\.sql$/) {die "SQL file should be named like 'foo.sql', but was '$sqlfile'\n";} + if (defined $flag_recovery) { warn "recovery enabled for XML processing\n"; } + my $dir = shift @ARGV_tail; if (defined $dir && -d "$dir") { @@ -487,20 +509,20 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) { my $fh_truncated_IEs = $tmp_ies_truncated_file->openr(); my $count=0; my $progressbar =Time::Progress->new(min => 0, max => $cnt_truncated_files, smoothing => 1); - open(my $fh, ">:encoding(UTF-8)", "$sqlfile") || die "could not open file '$sqlfile' for writing, $!"; - write_database_creation($fh); - write_tables_creation($fh); - write_prepare_insert($fh); + my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "") or die "could not connect to database (file '$db_filename')", $DBI::errstr; + write_database_creation($dbh); + write_tables_creation($dbh); + write_prepare_insert($dbh); while( <$fh_truncated_IEs> ) { chomp; print $progressbar->report("parse IE files: %40b ETA: %E \r", $count++); s/V0*(\d+-IE)/V$1/; # revert fake version - my $ret = parse_iexml($_, $recovery); - write_addsql($fh, $ret); + my $ret = parse_iexml($_, $flag_recovery); + write_addsql($dbh, $ret); } say ""; - write_index_creation($fh); - close ($fh); + write_index_creation($dbh); + $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr); say "processed $count uniq IEs"; } else { die "no directory given on commandline" -- GitLab