Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tools for technical analysts
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Harbor Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Digital Preservation
tools for technical analysts
Commits
9f74b321
Verified
Commit
9f74b321
authored
11 months ago
by
Andreas Romeyke
Browse files
Options
Downloads
Patches
Plain Diff
- init
parent
f327a807
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
lib/SLUB/LZA/TA/Command/sample_testing.pm
+276
-0
276 additions, 0 deletions
lib/SLUB/LZA/TA/Command/sample_testing.pm
with
276 additions
and
0 deletions
lib/SLUB/LZA/TA/Command/sample_testing.pm
0 → 100644
+
276
−
0
View file @
9f74b321
package
SLUB::LZA::TA::Command::
sample_testing
;
use
SLUB::LZA::
TA
-
command
;
use
v5
.36
;
use
utf8
;
use
SLUB::LZA::TA::Archivematica::
Elasticsearch
;
use
SLUB::LZA::TA::Archivematica::Elasticsearch::
PrepareQuery
;
use
SLUB::LZA::TA::
Output
;
use
Archive::BagIt::
Fast
;
use
Date::
Calc
qw(Date_to_Time Today Add_Delta_YM Add_Delta_YMD Day_of_Week)
;
use
List::
Util
qw(sample)
;
use
namespace::
autoclean
-
except
=>
qr{SLUB::LZA::TA::.*}
;
# VERSION
# ABSTRACT: sample testing module for ta-tool
sub
abstract
{
return
"
tests samples of AIPs in current Archival Information System (AIS)
";}
my
$base_cmd
=
"
$0 sample-test
";
my
$dummycmd
=
"
"
x
length
(
$base_cmd
);
my
$description
=
<<"DESCR";
Ask an AIS for a sample of AIPs and tests them.
Overview:
$base_cmd [--daily| --weekly | --monthly | --yearly] [--workflow WORKFLOW] [--factor FACTOR]
$dummycmd --version
$dummycmd --help
Examples:
* Report sample testing statistics of AIPs last month for workflow Kitodo
'$base_cmd --monthly --workflow Kitodo'
* Report 1‰ sample statistics of AIPs last year
'$base_cmd --yearly --factor 0.001'
A printable PDF version could be generated using ff. commands:
'$base_cmd | asciidoctor-pdf - > report.pdf'
HINT: If you want lists, use the 'search' command instead!
HINT: ensure the ElasticSearch server allows to return >10.000 results if your archive is large
DESCR
sub
description
{
return
"
$description
"
}
sub
opt_spec
{
my
@global_opts
=
SLUB::LZA::TA::
common_global_opt_spec
();
my
@local_opts
=
(
[
'
datemode
'
=>
hidden
=>
{
one_of
=>
[
[
'
daily|d
'
=>
'
sample based on last day
'],
[
'
weekly|W
'
=>
'
sample based on last week
'],
[
'
monthly|m
'
=>
'
sample based on last month
'
],
[
'
yearly|y
'
=>
'
sample based on last year
'
],
[
'
ldpyearly
'
=>
'
sample based on last LDP year 01.11. - 31.10.
'],
[
'
complete|c
'
=>
'
sample based on all AIPs (default)
'],
[
'
date-from=s
'
=>
'
sample based on date range, beginning date in format "YYYY-MM-DD", implies "--date-to"
'],
],
},
],
[
'
date-to=s
'
=>
'
report based on date range, end date in format "YYYY-MM-DD", implies "--date-from"
'],
[]
,
[
'
output-format
'
=>
hidden
=>
{
one_of
=>
[
[
'
output-as-csv|C
'
=>
'
prints output as Comma Separated Values (CSV)
'
],
[
'
output-as-raw|R
'
=>
'
print raw hash output
'
],
[
'
output-as-rsv|r
'
=>
'
prints output as Raw Strings Values (RSV)
'
],
[
'
output-as-asciidoc|a
'
=>
'
prints output as AsciiDoc [default]
'
],
],
}
],
[]
,
[
'
factor|f=f
'
=>
'
use the given sampling factor (based on AIP count, default: 0.001)
',
{
default
=>
0.001
}],
[]
,
);
return
(
@global_opts
,
[]
,
@local_opts
);
}
sub
validate_args
{
## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
my
(
$self
,
$opt
,
$args
)
=
@_
;
SLUB::LZA::TA::
common_global_validate
(
$self
,
$opt
,
$args
);
# no args allowed but options!
$self
->
usage_error
("
No args allowed
")
if
@$args
;
my
(
$cyear
,
$cmonth
,
$cday
)
=
Today
();
my
(
$from_year
,
$from_month
,
$from_day
);
my
(
$to_year
,
$to_month
,
$to_day
);
unless
(
exists
$opt
->
{
datemode
})
{
$opt
->
{
datemode
}
=
"
complete
";
$opt
->
{
complete
}
=
1
;
}
my
%date_recipe
;
$date_recipe
{
daily
}
=
sub
{
(
$from_year
,
$from_month
,
$from_day
)
=
Add_Delta_YMD
(
$cyear
,
$cmonth
,
$cday
,
0
,
0
,
-
1
);
(
$to_year
,
$to_month
,
$to_day
)
=
(
$from_year
,
$from_month
,
$from_day
);
};
$date_recipe
{
weekly
}
=
sub
{
(
$from_year
,
$from_month
,
$from_day
)
=
Add_Delta_YMD
(
$cyear
,
$cmonth
,
$cday
,
0
,
0
,
-
Day_of_Week
(
$cyear
,
$cmonth
,
$cday
)
-
6
);
(
$to_year
,
$to_month
,
$to_day
)
=
Add_Delta_YMD
(
$from_year
,
$from_month
,
$from_day
,
0
,
0
,
6
);
};
$date_recipe
{
monthly
}
=
sub
{
(
$from_year
,
$from_month
,
$from_day
)
=
Add_Delta_YM
(
$cyear
,
$cmonth
,
1
,
0
,
-
1
);
(
$to_year
,
$to_month
,
$to_day
)
=
Add_Delta_YMD
(
$from_year
,
$from_month
,
$from_day
,
0
,
1
,
-
1
);
};
$date_recipe
{
yearly
}
=
sub
{
(
$from_year
,
$from_month
,
$from_day
)
=
Add_Delta_YM
(
$cyear
,
1
,
1
,
-
1
,
0
);
(
$to_year
,
$to_month
,
$to_day
)
=
Add_Delta_YMD
(
$from_year
,
$from_month
,
$from_day
,
1
,
0
,
-
1
);
};
$date_recipe
{
complete
}
=
sub
{
$from_year
=
2015
;
$from_month
=
1
;
$from_day
=
1
;
$to_year
=
$cyear
;
$to_month
=
$cmonth
;
$to_day
=
$cday
;
};
$date_recipe
{
ldpyearly
}
=
sub
{
(
$from_year
,
$from_month
,
$from_day
)
=
Add_Delta_YM
(
$cyear
,
1
,
1
,
-
1
,
-
2
);
(
$to_year
,
$to_month
,
$to_day
)
=
Add_Delta_YMD
(
$from_year
,
$from_month
,
$from_day
,
1
,
0
,
-
1
);
};
$date_recipe
{
date_from
}
=
sub
{
$self
->
usage_error
('
--date-from implies --date-to"
')
unless
exists
$opt
->
{
date_to
};
if
(
$opt
->
{
date_from
}
=~
m/^(\d{4})-(\d{2})-(\d{2})$/
)
{
(
$from_year
,
$from_month
,
$from_day
)
=
(
$
1
,
$
2
,
$
3
);
}
else
{
$self
->
usage_error
('
--date-from expects date in format "YYYY-MM-DD", got "
'
.
$opt
->
{
date_from
}
.
'
"
');
}
};
$date_recipe
{
date_to
}
=
sub
{
$self
->
usage_error
('
--date-to implies --date-from"
')
unless
exists
$opt
->
{
date_from
};
if
(
$opt
->
{
date_to
}
=~
m/^(\d{4})-(\d{2})-(\d{2})$/
)
{
(
$to_year
,
$to_month
,
$to_day
)
=
(
$
1
,
$
2
,
$
3
);
}
else
{
$self
->
usage_error
('
--date-to expects date in format "YYYY-MM-DD", got "
',
$opt
->
{
date_to
}
.
'
"
');
}
};
foreach
my
$key
(
keys
%
{
$opt
}
)
{
$date_recipe
{
$key
}
->
()
if
(
defined
$date_recipe
{
$key
}
and
ref
$date_recipe
{
$key
}
eq
'
CODE
');
}
$opt
->
{
output_format
}
=
'
output_as_asciidoc
'
unless
(
exists
$opt
->
{
output_format
});
my
$from_epoch
=
Date_to_Time
(
$from_year
,
$from_month
,
$from_day
,
0
,
0
,
0
);
my
$to_epoch
=
Date_to_Time
(
$to_year
,
$to_month
,
$to_day
,
0
,
0
,
0
);
$self
->
usage_error
('
--date-to should have a date newer than --date-from
')
if
(
$from_epoch
>
$to_epoch
);
printf
STDERR
"
reporting for period %04u-%02u-%02u … %04u-%02u-%02u
\n
",
$from_year
,
$from_month
,
$from_day
,
$to_year
,
$to_month
,
$to_day
;
$opt
->
{
creationdate_epochs
}
->
{
from
}
=
$from_epoch
;
$opt
->
{
creationdate_epochs
}
->
{
to
}
=
$to_epoch
;
$opt
->
{
creationdate_epochs
}
->
{
from_string
}
=
sprintf
("
%04u-%02u-%02u
",
$from_year
,
$from_month
,
$from_day
);
$opt
->
{
creationdate_epochs
}
->
{
to_string
}
=
sprintf
("
%04u-%02u-%02u
",
$to_year
,
$to_month
,
$to_day
);
$self
->
usage_error
('
--factor FACTOR expects a FACTOR between 0 and 1, got
',
$opt
->
{
factor
})
if
(
$opt
->
{
factor
}
>
1.0
or
$opt
->
{
factor
}
<
0.0
);
return
1
;
}
sub
_check_aip
($sample) {
my
$path
=
$sample
->
{
filePath
};
my
$is_valid
=
0
;
if
(
-
d
$path
)
{
my
$bag
=
Archive::BagIt::
Fast
->
new
(
$path
);
$bag
->
use_plugins
('
Archive::BagIt::Plugin::Algorithm::SHA512
');
$is_valid
=
$bag
->
verify_bag
(
{
report_all_errors
=>
1
}
);
if
(
!
$is_valid
)
{
$is_valid
=
0
;}
}
my
$line
;
$line
->
{
aipid
}
=
$sample
->
{
uuid
};
$line
->
{
filePath
}
=
$sample
->
{
filePath
};
$line
->
{
lzaid
}
=
$sample
->
{
transferMetadata
}
->
[
0
]
->
{'
bim:bag-info_dict
'}
->
{'
bim:SLUBArchiv-lzaId
'};
$line
->
{
isvalid
}
=
$is_valid
;
$line
->
{
checkdate
}
=
localtime
;
$line
->
{
creationdate
}
=
localtime
$sample
->
{
created
};
return
$line
;
}
sub
_execute
{
my
(
$self
,
$opt
,
$args
)
=
@_
;
my
$aips_query
;
my
$aips_response
;
use
Data::
Printer
;
# only index aips needed
$aips_query
=
SLUB::LZA::TA::Archivematica::Elasticsearch::PrepareQuery::
prepare_aip_query
(
$opt
);
# next lines extend query with reporting
$aips_query
->
{'
_source
'}
=
{'
includes
'
=>
['
uuid
',
'
filePath
',
'
transferMetadata.bim:bag-info_dict.bim:SLUBArchiv-lzaId
',
'
created
']};
#p($aips_query);
$aips_response
=
SLUB::LZA::TA::Archivematica::Elasticsearch::
query_elasticsearch
(
$
SLUB::LZA::TA::
config
{
elasticsearch_protocol
},
$
SLUB::LZA::TA::
config
{
elasticsearch_host
},
$
SLUB::LZA::TA::
config
{
elasticsearch_port
},
'
aips
',
# indexname
$aips_query
,
# query_hash ref
{
debug
=>
$opt
->
{
debug
},
}
);
my
$max_found_aips
=
$aips_response
->
{
hits
}
->
{
total
};
my
$sample_size
=
int
(
$max_found_aips
*
$opt
->
{
factor
}
+
0.5
);
if
(
$opt
->
{
debug
})
{
say
STDERR
"
found
$max_found_aips
AIPs, use only a
$opt
->{factor} subsample of
$sample_size
AIPs
";
}
# select first sample size aips
my
@sample_set
=
sort
{
$a
->
{
uuid
}
cmp
$b
->
{
uuid
}}
List::Util::
sample
$sample_size
,
map
{
$_
->
{'
_source
'}
}
@
{
$aips_response
->
{
hits
}
->
{
hits
}
};
my
@result
=
map
{
_check_aip
(
$_
)
}
@sample_set
;
return
\
@result
;
}
sub
execute
($self, $opt, $args) {
my
%results
;
$results
{
date
}
=
sprintf
("
%04u-%02u-%02u
",
Today
());
$results
{
package
}
=
__PACKAGE__
;
$results
{
from
}
=
$opt
->
{
creationdate_epochs
}
->
{
from_string
};
$results
{
to
}
=
$opt
->
{
creationdate_epochs
}
->
{
to_string
};
# we need only _source->filePath and AIPID (and LZA-ID?)
my
$res
=
_execute
(
$self
,
$opt
,
$args
);
$results
{
lines
}
=
$res
;
my
(
$headers
,
$table
)
=
prepare_for_table
(
$res
);
print_humanreadable_report
(
\
%results
)
if
(
$opt
->
{
output_format
}
eq
'
output_as_asciidoc
');
SLUB::LZA::TA::Output::RSV::
print_results
(
$table
)
if
(
$opt
->
{
output_format
}
eq
'
output_as_rsv
');
SLUB::LZA::TA::Output::CSV::
print_results
(
$table
)
if
(
$opt
->
{
output_format
}
eq
'
output_as_csv
');
SLUB::LZA::TA::Output::Raw::
print_results
(
\
%results
)
if
(
$opt
->
{
output_format
}
eq
'
output_as_raw
');
say
STDERR
"
report is already sent to STDOUT.
";
return
1
;
}
sub
prepare_for_table
($results) {
## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
my
@table
=
@
{
$results
};
my
@headers
=
sort
keys
%
{
$table
[
0
]
};
return
\
@headers
,
\
@table
;
}
sub
_print_aip
($line) {
my
$aipid
=
$line
->
{
aipid
};
#my $isvalid=($line->{isvalid}?"valid (✅)":"*invalid* (❎)");
my
$isvalid
=
(
$line
->
{
isvalid
}?"
valid
":"
invalid
");
my
$lzaid
=
$line
->
{
lzaid
}?"
With lzaid '
$line
->{lzaid}'
":"
No lzaid defined
";
my
$path
=
$line
->
{
filePath
};
say
"
* AIP `
$aipid
`
";
say
"
** Is
$isvalid
";
say
"
**
$lzaid
";
say
"
** Path is `
$path
`
";
say
"
** processed at
$line
->{checkdate}, created at
$line
->{creationdate}
";
return
1
;
}
sub
print_humanreadable_report
($results) {
## no critic (CognitiveComplexity::ProhibitExcessCognitiveComplexity)
say
<<"RPTHEADER";
:lang: en
:doctype: article
:date: $results->{date}
:icons:
:text-align: left
:generator: $0 ($results->{package})
= Report from $results->{from} to $results->{to}
RPTHEADER
say
"
== Complete archive
\n
";
say
"
=== Invalid AIPs
";
foreach
my
$line
(
grep
{
!
(
$_
->
{
is_valid
})}
@
{
$results
->
{
lines
}
})
{
_print_aip
(
$line
)
}
say
"
=== Valid AIPs
";
foreach
my
$line
(
grep
{
!
(
$_
->
{
is_valid
})}
@
{
$results
->
{
lines
}
})
{
_print_aip
(
$line
)
}
return
1
;
}
1
;
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment