Skip to content
Snippets Groups Projects
Commit dff59cb9 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- changed to use SQLite

parent e3cea4b1
No related branches found
No related tags found
No related merge requests found
...@@ -51,6 +51,8 @@ use File::Sort qw(sort_file); ...@@ -51,6 +51,8 @@ use File::Sort qw(sort_file);
use XML::LibXML; use XML::LibXML;
use Time::Progress; use Time::Progress;
use XML::LibXML::XPathContext; use XML::LibXML::XPathContext;
use DBD::SQLite;
use DBI;
use Getopt::Long; use Getopt::Long;
use constant DEBUG => 0; # no debug use constant DEBUG => 0; # no debug
...@@ -78,83 +80,68 @@ sub write_database_creation ($fh) { ...@@ -78,83 +80,68 @@ sub write_database_creation ($fh) {
} }
# write tables creation;: # write tables creation;:
sub write_tables_creation ($fh) { sub write_tables_creation ($dbh) {
# Transactions for tables creation my $sql1=<<"SQL_CREATE1";
say $fh "BEGIN;"; /* create AIP table */
CREATE TABLE aip (
# SEQUENCE id INTEGER,
say $fh "/* create SEQUENCE generator */"; ie_id TEXT NOT NULL UNIQUE,
say $fh "CREATE SEQUENCE serial START 1;"; PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE1
my $sql2=<<"SQL_CREATE2";
/* create IEFILE table */
CREATE TABLE metadatafile (
id INTEGER,
aip_id INTEGER NOT NULL REFERENCES aip (id),
location TEXT NOT NULL,
sourcetype TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE2
my $sql3=<<"SQL_CREATE3";
/* create DC table */
CREATE TABLE dc (
id INTEGER,
aip_id INTEGER NOT NULL REFERENCES aip (id),
element TEXT NOT NULL,
value TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE3
my $sql4=<<"SQL_CREATE4";
/* create FILE table */
CREATE TABLE sourcedatafile (
id INTEGER,
aip_id INTEGER NOT NULL REFERENCES aip (id),
name TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE4
my $sql5=<<"SQL_CREATE5";
/* create LOCAT table */
CREATE TABLE sourcedatalocat (
id INTEGER,
file_id INTEGER NOT NULL REFERENCES sourcedatafile (id),
location TEXT NOT NULL,
sourcetype TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE5
my $sth1 = $dbh->prepare($sql1); $sth1->execute() or die "sql problem detected", $dbh->errstr;
my $sth2 = $dbh->prepare($sql2); $sth2->execute() or die "sql problem detected", $dbh->errstr;
my $sth3 = $dbh->prepare($sql3); $sth3->execute() or die "sql problem detected", $dbh->errstr;
my $sth4 = $dbh->prepare($sql4); $sth4->execute() or die "sql problem detected", $dbh->errstr;
my $sth5 = $dbh->prepare($sql5); $sth5->execute() or die "sql problem detected", $dbh->errstr;
# AIP return 1;
say $fh "/* create AIP table */";
say $fh "CREATE TABLE aip (";
say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
say $fh "\tie_id VARCHAR(30) NOT NULL UNIQUE";
say $fh ");";
# IEFILE
say $fh "/* create IEFILE table */";
say $fh "CREATE TABLE metadatafile (";
say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
say $fh "\taip_id INT NOT NULL REFERENCES aip (id),";
say $fh "\tlocation VARCHAR(1024) NOT NULL,";
say $fh "\tsourcetype VARCHAR(30) NOT NULL";
say $fh ");";
# DC
say $fh "/* create DC table */";
say $fh "CREATE TABLE dc (";
say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
say $fh "\taip_id INT NOT NULL REFERENCES aip (id),";
say $fh "\telement VARCHAR(30) NOT NULL,";
say $fh "\tvalue VARCHAR(8192) NOT NULL";
say $fh ");";
# FILE
say $fh "/* create FILE table */";
say $fh "CREATE TABLE sourcedatafile (";
say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'), ";
say $fh "\taip_id INT NOT NULL REFERENCES aip (id),";
say $fh "\tname VARCHAR(1024) NOT NULL";
say $fh ");";
# LOCAT
say $fh "/* create LOCAT table */";
say $fh "CREATE TABLE sourcedatalocat (";
say $fh "\tid INT PRIMARY KEY DEFAULT nextval('serial'),";
say $fh "\tfile_id INT NOT NULL REFERENCES sourcedatafile (id),";
say $fh "\tlocation VARCHAR(1024) NOT NULL,";
say $fh "\tsourcetype VARCHAR(30) NOT NULL";
say $fh ");";
#end transaction
say $fh "COMMIT;";
return;
} }
############################################################################### ###############################################################################
# Prepare SQL INSERT Statements for AIPs # Prepare SQL INSERT Statements for AIPs
############################################################################### ###############################################################################
sub write_prepare_insert ($fh) { sub write_prepare_insert ($dbh) {
say $fh "BEGIN;"; return 1;
say $fh "PREPARE aip_plan (varchar) AS";
say $fh " INSERT INTO aip (ie_id) VALUES (\$1);";
say $fh "PREPARE ie_plan (varchar, varchar, varchar) AS";
say $fh " INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES (";
say $fh " (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3";
say $fh " );";
say $fh "PREPARE file_plan (varchar, varchar) AS";
say $fh " INSERT INTO sourcedatafile (aip_id, name) VALUES (";
say $fh " (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2";
say $fh " );";
say $fh "PREPARE locat_plan (varchar, varchar, varchar, varchar) AS";
say $fh " INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES (";
say $fh " (SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE";
say $fh " sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND";
say $fh " sourcedatafile.name=\$2), \$3, \$4";
say $fh " );";
say $fh "PREPARE dc_plan (varchar, varchar, varchar) AS";
say $fh " INSERT INTO dc (aip_id, element, value) VALUES (";
say $fh " (SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3";
say $fh " );";
say $fh "COMMIT;";
return;
} }
...@@ -179,27 +166,55 @@ sub write_prepare_insert ($fh) { ...@@ -179,27 +166,55 @@ sub write_prepare_insert ($fh) {
# $ret{"files"} = \@files; # $ret{"files"} = \@files;
# $ret{"dcrecords"} = \@dcrecords; # $ret{"dcrecords"} = \@dcrecords;
############################################################################### ###############################################################################
sub write_addsql ($fh, $refhash) { sub write_addsql ($dbh, $refhash) {
my $ieid = path($refhash->{"filename"})->basename(qw/.xml/); my $ieid = path($refhash->{"filename"})->basename(qw/.xml/);
say $fh "BEGIN;"; my $sql_aip_plan=<<"SQL_AIP_PLAN";
say $fh "EXECUTE aip_plan ('$ieid');"; INSERT INTO aip (ie_id) VALUES (\$1);
SQL_AIP_PLAN
my $sql_ie_plan=<<"SQL_IE_PLAN";
INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES (
(SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3
);
SQL_IE_PLAN
my $sql_file_plan=<<"SQL_FILE_PLAN";
INSERT INTO sourcedatafile (aip_id, name) VALUES (
(SELECT id FROM aip WHERE aip.ie_id=\$1), \$2
);
SQL_FILE_PLAN
my $sql_locat_plan=<<"SQL_LOCAT_PLAN";
INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES (
(SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE
sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND
sourcedatafile.name=\$2), \$3, \$4
);
SQL_LOCAT_PLAN
my $sql_dc_pan=<<"SQL_DC_PLAN";
INSERT INTO dc (aip_id, element, value) VALUES (
(SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3
);
SQL_DC_PLAN
my $sth_aip_plan = $dbh->prepare($sql_aip_plan);
my $sth_ie_plan = $dbh->prepare($sql_ie_plan);
my $sth_file_plan = $dbh->prepare($sql_file_plan);
my $sth_locat_plan = $dbh->prepare($sql_locat_plan);
my $sth_dc_plan = $dbh->prepare($sql_dc_pan);
# start SQL insert
$sth_aip_plan->execute($ieid) or die "sql problem detected", $dbh->errstr;
# FIXME if multiple locations exists # FIXME if multiple locations exists
my $iefile = path($refhash->{"filename"})->basename(); my $iefile = path($refhash->{"filename"})->basename();
say $fh "EXECUTE ie_plan ('$ieid', '$iefile', '$sourcetype');"; $sth_ie_plan->execute( $ieid, $iefile, $sourcetype) or die "sql problem detected", $dbh->errstr;
foreach my $location (@{$refhash->{"files"}}) { foreach my $location (@{$refhash->{"files"}}) {
my $file = path($location)->basename(); # FIXME if multiple locations my $file = path($location)->basename(); # FIXME if multiple locations
say $fh "EXECUTE file_plan ('$ieid', '$file');"; $sth_file_plan->execute($ieid, $file) or die "sql problem detected", $dbh->errstr;
say $fh "EXECUTE locat_plan ('$ieid', '$file', '$location', '$sourcetype' );"; $sth_locat_plan->execute($ieid, $file, $location, $sourcetype) or die "sql problem detected", $dbh->errstr;
} }
foreach my $dcpair (@{$refhash->{"dcrecords"}}) { foreach my $dcpair (@{$refhash->{"dcrecords"}}) {
my ($dckey,$dcvalue) = @{$dcpair}; my ($dckey,$dcvalue) = @{$dcpair};
# quote ' in dcvalue # quote ' in dcvalue
$dcvalue=~tr/'/"/; $dcvalue=~tr/'/"/;
say $fh "EXECUTE dc_plan ( '$ieid', '$dckey', '$dcvalue');"; $sth_dc_plan->execute($ieid, $dckey, $dcvalue) or die "sql problem detected", $dbh->errstr;
} }
say $fh "COMMIT;"; return 1;
say $fh "\n";
return;
} }
...@@ -207,11 +222,15 @@ sub write_addsql ($fh, $refhash) { ...@@ -207,11 +222,15 @@ sub write_addsql ($fh, $refhash) {
############################################################################### ###############################################################################
# add INDEX and other TRICKs to increase performance # add INDEX and other TRICKs to increase performance
############################################################################### ###############################################################################
sub write_index_creation($fh) { sub write_index_creation($dbh) {
say $fh "-- BEGIN;"; my $sql=<<"SQL_INDEX";
say $fh "-- CREATE UNIQUE INDEX aip_index on aip (ie_id);"; -- BEGIN;
say $fh "-- COMMIT;"; -- CREATE UNIQUE INDEX aip_index on aip (ie_id);
return; -- COMMIT;
SQL_INDEX
my $sth = $dbh->prepare($sql);
$sth->execute() or die "sql problem detected", $dbh->errstr;
return 1;
} }
############################################################################### ###############################################################################
...@@ -456,18 +475,21 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) { ...@@ -456,18 +475,21 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
############# main ############################################################ ############# main ############################################################
############################################################################### ###############################################################################
############################################################################### ###############################################################################
my $recovery = undef; my $flag_recovery = undef;
my $flag_sqldump = undef;
my $db_filename = $db_name.".db";
my @ARGV_tail; my @ARGV_tail;
GetOptions( GetOptions(
"recovery" => \$recovery, "recovery" => \$flag_recovery,
"sqlitedb-file=s" => \$db_filename,
"enable_sqldump" => \$flag_sqldump,
'<>' => sub {push @ARGV_tail, @_;} '<>' => sub {push @ARGV_tail, @_;}
); );
if ($#ARGV_tail != 1) { if ($#ARGV_tail < 0) {
die "you need a SQL-file and a directory as argument\n"; die "you need a directory as argument\n";
} }
if (defined $recovery) { warn "recovery enabled for XML processing\n"; } if (defined $flag_recovery) { warn "recovery enabled for XML processing\n"; }
my $sqlfile = shift @ARGV_tail;
if($sqlfile !~ m/[A-Za-z0-9]+\.sql$/) {die "SQL file should be named like 'foo.sql', but was '$sqlfile'\n";}
my $dir = shift @ARGV_tail; my $dir = shift @ARGV_tail;
if (defined $dir && -d "$dir") { if (defined $dir && -d "$dir") {
...@@ -487,20 +509,20 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) { ...@@ -487,20 +509,20 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
my $fh_truncated_IEs = $tmp_ies_truncated_file->openr(); my $fh_truncated_IEs = $tmp_ies_truncated_file->openr();
my $count=0; my $count=0;
my $progressbar =Time::Progress->new(min => 0, max => $cnt_truncated_files, smoothing => 1); my $progressbar =Time::Progress->new(min => 0, max => $cnt_truncated_files, smoothing => 1);
open(my $fh, ">:encoding(UTF-8)", "$sqlfile") || die "could not open file '$sqlfile' for writing, $!"; my $dbh = DBI->connect("dbi:SQLite:dbname=$db_filename", "", "") or die "could not connect to database (file '$db_filename')", $DBI::errstr;
write_database_creation($fh); write_database_creation($dbh);
write_tables_creation($fh); write_tables_creation($dbh);
write_prepare_insert($fh); write_prepare_insert($dbh);
while( <$fh_truncated_IEs> ) { while( <$fh_truncated_IEs> ) {
chomp; chomp;
print $progressbar->report("parse IE files: %40b ETA: %E \r", $count++); print $progressbar->report("parse IE files: %40b ETA: %E \r", $count++);
s/V0*(\d+-IE)/V$1/; # revert fake version s/V0*(\d+-IE)/V$1/; # revert fake version
my $ret = parse_iexml($_, $recovery); my $ret = parse_iexml($_, $flag_recovery);
write_addsql($fh, $ret); write_addsql($dbh, $ret);
} }
say ""; say "";
write_index_creation($fh); write_index_creation($dbh);
close ($fh); $dbh->disconnect or warn("disconnecting problems, ", $dbh->errstr);
say "processed $count uniq IEs"; say "processed $count uniq IEs";
} else { } else {
die "no directory given on commandline" die "no directory given on commandline"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment