Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
rosettaExitStrategy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Digital Preservation
rosettaExitStrategy
Commits
dff59cb9
Commit
dff59cb9
authored
3 years ago
by
Andreas Romeyke
Browse files
Options
Downloads
Patches
Plain Diff
- changed to use SQLite
parent
e3cea4b1
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
perl/exit_strategy.pl
+123
-101
123 additions, 101 deletions
perl/exit_strategy.pl
with
123 additions
and
101 deletions
perl/exit_strategy.pl
+
123
−
101
View file @
dff59cb9
...
@@ -51,6 +51,8 @@ use File::Sort qw(sort_file);
...
@@ -51,6 +51,8 @@ use File::Sort qw(sort_file);
use
XML::
LibXML
;
use
XML::
LibXML
;
use
Time::
Progress
;
use
Time::
Progress
;
use
XML::LibXML::
XPathContext
;
use
XML::LibXML::
XPathContext
;
use
DBD::
SQLite
;
use
DBI
;
use
Getopt::
Long
;
use
Getopt::
Long
;
use
constant
DEBUG
=>
0
;
# no debug
use
constant
DEBUG
=>
0
;
# no debug
...
@@ -78,83 +80,68 @@ sub write_database_creation ($fh) {
...
@@ -78,83 +80,68 @@ sub write_database_creation ($fh) {
}
}
# write tables creation;:
# write tables creation;:
sub
write_tables_creation
($fh) {
sub
write_tables_creation
($dbh) {
# Transactions for tables creation
my
$sql1
=
<<"SQL_CREATE1";
say
$fh
"
BEGIN;
";
/* create AIP table */
CREATE TABLE aip (
# SEQUENCE
id INTEGER,
say
$fh
"
/* create SEQUENCE generator */
";
ie_id TEXT NOT NULL UNIQUE,
say
$fh
"
CREATE SEQUENCE serial START 1;
";
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE1
my
$sql2
=
<<"SQL_CREATE2";
/* create IEFILE table */
CREATE TABLE metadatafile (
id INTEGER,
aip_id INTEGER NOT NULL REFERENCES aip (id),
location TEXT NOT NULL,
sourcetype TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE2
my
$sql3
=
<<"SQL_CREATE3";
/* create DC table */
CREATE TABLE dc (
id INTEGER,
aip_id INTEGER NOT NULL REFERENCES aip (id),
element TEXT NOT NULL,
value TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE3
my
$sql4
=
<<"SQL_CREATE4";
/* create FILE table */
CREATE TABLE sourcedatafile (
id INTEGER,
aip_id INTEGER NOT NULL REFERENCES aip (id),
name TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE4
my
$sql5
=
<<"SQL_CREATE5";
/* create LOCAT table */
CREATE TABLE sourcedatalocat (
id INTEGER,
file_id INTEGER NOT NULL REFERENCES sourcedatafile (id),
location TEXT NOT NULL,
sourcetype TEXT NOT NULL,
PRIMARY KEY(id AUTOINCREMENT)
);
SQL_CREATE5
my
$sth1
=
$dbh
->
prepare
(
$sql1
);
$sth1
->
execute
()
or
die
"
sql problem detected
",
$dbh
->
errstr
;
my
$sth2
=
$dbh
->
prepare
(
$sql2
);
$sth2
->
execute
()
or
die
"
sql problem detected
",
$dbh
->
errstr
;
my
$sth3
=
$dbh
->
prepare
(
$sql3
);
$sth3
->
execute
()
or
die
"
sql problem detected
",
$dbh
->
errstr
;
my
$sth4
=
$dbh
->
prepare
(
$sql4
);
$sth4
->
execute
()
or
die
"
sql problem detected
",
$dbh
->
errstr
;
my
$sth5
=
$dbh
->
prepare
(
$sql5
);
$sth5
->
execute
()
or
die
"
sql problem detected
",
$dbh
->
errstr
;
# AIP
return
1
;
say
$fh
"
/* create AIP table */
";
say
$fh
"
CREATE TABLE aip (
";
say
$fh
"
\t
id INT PRIMARY KEY DEFAULT nextval('serial'),
";
say
$fh
"
\t
ie_id VARCHAR(30) NOT NULL UNIQUE
";
say
$fh
"
);
";
# IEFILE
say
$fh
"
/* create IEFILE table */
";
say
$fh
"
CREATE TABLE metadatafile (
";
say
$fh
"
\t
id INT PRIMARY KEY DEFAULT nextval('serial'),
";
say
$fh
"
\t
aip_id INT NOT NULL REFERENCES aip (id),
";
say
$fh
"
\t
location VARCHAR(1024) NOT NULL,
";
say
$fh
"
\t
sourcetype VARCHAR(30) NOT NULL
";
say
$fh
"
);
";
# DC
say
$fh
"
/* create DC table */
";
say
$fh
"
CREATE TABLE dc (
";
say
$fh
"
\t
id INT PRIMARY KEY DEFAULT nextval('serial'),
";
say
$fh
"
\t
aip_id INT NOT NULL REFERENCES aip (id),
";
say
$fh
"
\t
element VARCHAR(30) NOT NULL,
";
say
$fh
"
\t
value VARCHAR(8192) NOT NULL
";
say
$fh
"
);
";
# FILE
say
$fh
"
/* create FILE table */
";
say
$fh
"
CREATE TABLE sourcedatafile (
";
say
$fh
"
\t
id INT PRIMARY KEY DEFAULT nextval('serial'),
";
say
$fh
"
\t
aip_id INT NOT NULL REFERENCES aip (id),
";
say
$fh
"
\t
name VARCHAR(1024) NOT NULL
";
say
$fh
"
);
";
# LOCAT
say
$fh
"
/* create LOCAT table */
";
say
$fh
"
CREATE TABLE sourcedatalocat (
";
say
$fh
"
\t
id INT PRIMARY KEY DEFAULT nextval('serial'),
";
say
$fh
"
\t
file_id INT NOT NULL REFERENCES sourcedatafile (id),
";
say
$fh
"
\t
location VARCHAR(1024) NOT NULL,
";
say
$fh
"
\t
sourcetype VARCHAR(30) NOT NULL
";
say
$fh
"
);
";
#end transaction
say
$fh
"
COMMIT;
";
return
;
}
}
###############################################################################
###############################################################################
# Prepare SQL INSERT Statements for AIPs
# Prepare SQL INSERT Statements for AIPs
###############################################################################
###############################################################################
sub
write_prepare_insert
($fh) {
sub
write_prepare_insert
($dbh) {
say
$fh
"
BEGIN;
";
return
1
;
say
$fh
"
PREPARE aip_plan (varchar) AS
";
say
$fh
"
INSERT INTO aip (ie_id) VALUES (
\$
1);
";
say
$fh
"
PREPARE ie_plan (varchar, varchar, varchar) AS
";
say
$fh
"
INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES (
";
say
$fh
"
(SELECT id FROM aip WHERE aip.ie_id=
\$
1),
\$
2,
\$
3
";
say
$fh
"
);
";
say
$fh
"
PREPARE file_plan (varchar, varchar) AS
";
say
$fh
"
INSERT INTO sourcedatafile (aip_id, name) VALUES (
";
say
$fh
"
(SELECT id FROM aip WHERE aip.ie_id=
\$
1),
\$
2
";
say
$fh
"
);
";
say
$fh
"
PREPARE locat_plan (varchar, varchar, varchar, varchar) AS
";
say
$fh
"
INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES (
";
say
$fh
"
(SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE
";
say
$fh
"
sourcedatafile.aip_id=aip.id AND aip.ie_id=
\$
1 AND
";
say
$fh
"
sourcedatafile.name=
\$
2),
\$
3,
\$
4
";
say
$fh
"
);
";
say
$fh
"
PREPARE dc_plan (varchar, varchar, varchar) AS
";
say
$fh
"
INSERT INTO dc (aip_id, element, value) VALUES (
";
say
$fh
"
(SELECT id FROM aip WHERE aip.ie_id=
\$
1),
\$
2,
\$
3
";
say
$fh
"
);
";
say
$fh
"
COMMIT;
";
return
;
}
}
...
@@ -179,27 +166,55 @@ sub write_prepare_insert ($fh) {
...
@@ -179,27 +166,55 @@ sub write_prepare_insert ($fh) {
# $ret{"files"} = \@files;
# $ret{"files"} = \@files;
# $ret{"dcrecords"} = \@dcrecords;
# $ret{"dcrecords"} = \@dcrecords;
###############################################################################
###############################################################################
sub
write_addsql
($
f
h, $refhash) {
sub
write_addsql
($
db
h, $refhash) {
my
$ieid
=
path
(
$refhash
->
{"
filename
"})
->
basename
(
qw/.xml/
);
my
$ieid
=
path
(
$refhash
->
{"
filename
"})
->
basename
(
qw/.xml/
);
say
$fh
"
BEGIN;
";
my
$sql_aip_plan
=
<<"SQL_AIP_PLAN";
say
$fh
"
EXECUTE aip_plan ('
$ieid
');
";
INSERT INTO aip (ie_id) VALUES (\$1);
SQL_AIP_PLAN
my
$sql_ie_plan
=
<<"SQL_IE_PLAN";
INSERT INTO metadatafile (aip_id, location, sourcetype) VALUES (
(SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3
);
SQL_IE_PLAN
my
$sql_file_plan
=
<<"SQL_FILE_PLAN";
INSERT INTO sourcedatafile (aip_id, name) VALUES (
(SELECT id FROM aip WHERE aip.ie_id=\$1), \$2
);
SQL_FILE_PLAN
my
$sql_locat_plan
=
<<"SQL_LOCAT_PLAN";
INSERT INTO sourcedatalocat (file_id, location, sourcetype) VALUES (
(SELECT sourcedatafile.id FROM sourcedatafile,aip WHERE
sourcedatafile.aip_id=aip.id AND aip.ie_id=\$1 AND
sourcedatafile.name=\$2), \$3, \$4
);
SQL_LOCAT_PLAN
my
$sql_dc_pan
=
<<"SQL_DC_PLAN";
INSERT INTO dc (aip_id, element, value) VALUES (
(SELECT id FROM aip WHERE aip.ie_id=\$1), \$2, \$3
);
SQL_DC_PLAN
my
$sth_aip_plan
=
$dbh
->
prepare
(
$sql_aip_plan
);
my
$sth_ie_plan
=
$dbh
->
prepare
(
$sql_ie_plan
);
my
$sth_file_plan
=
$dbh
->
prepare
(
$sql_file_plan
);
my
$sth_locat_plan
=
$dbh
->
prepare
(
$sql_locat_plan
);
my
$sth_dc_plan
=
$dbh
->
prepare
(
$sql_dc_pan
);
# start SQL insert
$sth_aip_plan
->
execute
(
$ieid
)
or
die
"
sql problem detected
",
$dbh
->
errstr
;
# FIXME if multiple locations exists
# FIXME if multiple locations exists
my
$iefile
=
path
(
$refhash
->
{"
filename
"})
->
basename
();
my
$iefile
=
path
(
$refhash
->
{"
filename
"})
->
basename
();
say
$fh
"
EXECUTE ie_plan ('
$ieid
'
,
'
$iefile
'
,
'
$sourcetype
');
"
;
$sth_ie_plan
->
execute
(
$ieid
,
$iefile
,
$sourcetype
)
or
die
"
sql problem detected
",
$dbh
->
errstr
;
foreach
my
$location
(
@
{
$refhash
->
{"
files
"}})
{
foreach
my
$location
(
@
{
$refhash
->
{"
files
"}})
{
my
$file
=
path
(
$location
)
->
basename
();
# FIXME if multiple locations
my
$file
=
path
(
$location
)
->
basename
();
# FIXME if multiple locations
say
$fh
"
EXECUTE file_plan ('
$ieid
'
,
'
$file
');
"
;
$sth_file_plan
->
execute
(
$ieid
,
$file
)
or
die
"
sql problem detected
",
$dbh
->
errstr
;
say
$fh
"
EXECUTE locat_plan ('
$ieid
'
,
'
$file
'
,
'
$location
'
,
'
$sourcetype
' );
"
;
$sth_locat_plan
->
execute
(
$ieid
,
$file
,
$location
,
$sourcetype
)
or
die
"
sql problem detected
",
$dbh
->
errstr
;
}
}
foreach
my
$dcpair
(
@
{
$refhash
->
{"
dcrecords
"}})
{
foreach
my
$dcpair
(
@
{
$refhash
->
{"
dcrecords
"}})
{
my
(
$dckey
,
$dcvalue
)
=
@
{
$dcpair
};
my
(
$dckey
,
$dcvalue
)
=
@
{
$dcpair
};
# quote ' in dcvalue
# quote ' in dcvalue
$dcvalue
=~
tr/'/"/
;
$dcvalue
=~
tr/'/"/
;
say
$fh
"
EXECUTE dc_plan ( '
$ieid
'
,
'
$dckey
'
,
'
$dcvalue
');
"
;
$sth_dc_plan
->
execute
(
$ieid
,
$dckey
,
$dcvalue
)
or
die
"
sql problem detected
",
$dbh
->
errstr
;
}
}
say
$fh
"
COMMIT;
";
return
1
;
say
$fh
"
\n
";
return
;
}
}
...
@@ -207,11 +222,15 @@ sub write_addsql ($fh, $refhash) {
...
@@ -207,11 +222,15 @@ sub write_addsql ($fh, $refhash) {
###############################################################################
###############################################################################
# add INDEX and other TRICKs to increase performance
# add INDEX and other TRICKs to increase performance
###############################################################################
###############################################################################
sub
write_index_creation
($fh) {
sub
write_index_creation
($dbh) {
say
$fh
"
-- BEGIN;
";
my
$sql
=
<<"SQL_INDEX";
say
$fh
"
-- CREATE UNIQUE INDEX aip_index on aip (ie_id);
";
-- BEGIN;
say
$fh
"
-- COMMIT;
";
-- CREATE UNIQUE INDEX aip_index on aip (ie_id);
return
;
-- COMMIT;
SQL_INDEX
my
$sth
=
$dbh
->
prepare
(
$sql
);
$sth
->
execute
()
or
die
"
sql problem detected
",
$dbh
->
errstr
;
return
1
;
}
}
###############################################################################
###############################################################################
...
@@ -456,18 +475,21 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
...
@@ -456,18 +475,21 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
############# main ############################################################
############# main ############################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
my
$recovery
=
undef
;
my
$flag_recovery
=
undef
;
my
$flag_sqldump
=
undef
;
my
$db_filename
=
$db_name
.
"
.db
";
my
@ARGV_tail
;
my
@ARGV_tail
;
GetOptions
(
GetOptions
(
"
recovery
"
=>
\
$recovery
,
"
recovery
"
=>
\
$flag_recovery
,
"
sqlitedb-file=s
"
=>
\
$db_filename
,
"
enable_sqldump
"
=>
\
$flag_sqldump
,
'
<>
'
=>
sub
{
push
@ARGV_tail
,
@_
;}
'
<>
'
=>
sub
{
push
@ARGV_tail
,
@_
;}
);
);
if
(
$#ARGV_tail
!=
1
)
{
if
(
$#ARGV_tail
<
0
)
{
die
"
you need a
SQL-file and a
directory as argument
\n
";
die
"
you need a directory as argument
\n
";
}
}
if
(
defined
$recovery
)
{
warn
"
recovery enabled for XML processing
\n
";
}
if
(
defined
$flag_recovery
)
{
warn
"
recovery enabled for XML processing
\n
";
}
my
$sqlfile
=
shift
@ARGV_tail
;
if
(
$sqlfile
!~
m/[A-Za-z0-9]+\.sql$/
)
{
die
"
SQL file should be named like 'foo.sql', but was '
$sqlfile
'
\n
";}
my
$dir
=
shift
@ARGV_tail
;
my
$dir
=
shift
@ARGV_tail
;
if
(
defined
$dir
&&
-
d
"
$dir
")
{
if
(
defined
$dir
&&
-
d
"
$dir
")
{
...
@@ -487,20 +509,20 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
...
@@ -487,20 +509,20 @@ sub searching_ie_files ($dir, $tmp_ies_unsorted_file) {
my
$fh_truncated_IEs
=
$tmp_ies_truncated_file
->
openr
();
my
$fh_truncated_IEs
=
$tmp_ies_truncated_file
->
openr
();
my
$count
=
0
;
my
$count
=
0
;
my
$progressbar
=
Time::
Progress
->
new
(
min
=>
0
,
max
=>
$cnt_truncated_files
,
smoothing
=>
1
);
my
$progressbar
=
Time::
Progress
->
new
(
min
=>
0
,
max
=>
$cnt_truncated_files
,
smoothing
=>
1
);
open
(
my
$fh
,
"
>:encoding(UTF-8)
",
"
$sqlfile
")
||
die
"
could not
open
file '
$
sql
file
' for writing, $!
"
;
my
$dbh
=
DBI
->
connect
("
dbi:SQLite:dbname=
$db_filename
",
"",
"
")
or
die
"
could not
connect to database (
file '
$
db_
file
name
')
",
$
DBI::
errstr
;
write_database_creation
(
$
f
h
);
write_database_creation
(
$
db
h
);
write_tables_creation
(
$
f
h
);
write_tables_creation
(
$
db
h
);
write_prepare_insert
(
$
f
h
);
write_prepare_insert
(
$
db
h
);
while
(
<
$fh_truncated_IEs
>
)
{
while
(
<
$fh_truncated_IEs
>
)
{
chomp
;
chomp
;
print
$progressbar
->
report
("
parse IE files: %40b ETA: %E
\r
",
$count
++
);
print
$progressbar
->
report
("
parse IE files: %40b ETA: %E
\r
",
$count
++
);
s/V0*(\d+-IE)/V$1/
;
# revert fake version
s/V0*(\d+-IE)/V$1/
;
# revert fake version
my
$ret
=
parse_iexml
(
$_
,
$recovery
);
my
$ret
=
parse_iexml
(
$_
,
$
flag_
recovery
);
write_addsql
(
$
f
h
,
$ret
);
write_addsql
(
$
db
h
,
$ret
);
}
}
say
"";
say
"";
write_index_creation
(
$
f
h
);
write_index_creation
(
$
db
h
);
close
(
$fh
);
$dbh
->
disconnect
or
warn
("
disconnecting problems,
",
$dbh
->
errstr
);
say
"
processed
$count
uniq IEs
";
say
"
processed
$count
uniq IEs
";
}
else
{
}
else
{
die
"
no directory given on commandline
"
die
"
no directory given on commandline
"
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment