Skip to content
Snippets Groups Projects
Commit 2bdb2465 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- init

parents
No related branches found
No related tags found
No related merge requests found
use strict;
use warnings;
use SLUB::LZA::Rosetta::TA;
SLUB::LZA::Rosetta::TA->run;
=head1 NAME
ta-tool - the swiss knife for the technical analyst working with Rosetta Achival Information system
=head1 SYNOPSIS
ta-tool <cmd> [options]
COMMANDS:
search the search
count
OPTIONS:
-v --verbose Show more detailed option
-h --help Prints this help information
=head1 DESCRIPTION
TODO
=head1 COPYRIGHT AND LICENSE
Copyright by Andreas Romeyke, free licensed under Perl Artistic License.
=cut
1;
name = SLUB-LZA-Rosetta-TA
author = Andreas Romeyke <pause@andreas-romeyke.de>
license = Perl_5
copyright_holder = Andreas Romeyke
copyright_year = 2021
version = 0.001
[@Basic]
use strict;
use warnings;
package SLUB::LZA::Rosetta::TA;
use App::Cmd::Setup -app;
use Path::Tiny;
use YAML;
use LWP;
#use LWP::Debug qw(+);
use LWP::UserAgent;
use Carp;
use feature qw(say);
our %config;
my $home = path( $ENV{'HOME'} );
if ($home->is_dir() && !$home->is_rootdir) {
my $config_path = $home->child('.config')->child('ta-tool.rc');
our $config_file = $config_path;
if ($config_path->is_file) {
%config = YAML::LoadFile($config_path);
}
}
sub sru_search {
my $searchtype = shift;
my $query = shift;
my $startrecord = shift;
my $maxrecords = shift;
my $is_verbose = shift;
my %searchpaths = (
ie => 'permanent/ie',
file => 'permanent/file',
sip => 'operational'
);
if (!exists $searchpaths{$searchtype}){
croak ("Code error, wrong searchtype ($searchtype) used!");
}
my $protocol = 'https';
my $host = $config{host};
my $port = '8443';
my $searchpath = $searchpaths{$searchtype};
my $srubase="${protocol}://${host}:${port}/search/${searchpath}/sru";
my $sru = "${srubase}?version=1.2&operation=searchRetrieve&startRecord=$startrecord&maximumRecords=$maxrecords&recordSchema=dc&query=${query}";
my $ua = LWP::UserAgent->new(keep_alive => 1);
$ua->agent("MyApp/0.1 ");
$ua->timeout(3600);#1h
$ua->default_headers->push_header('Accept-Encoding' => 'br, lzma, bzip2, gzip, compressed, deflate');
$ua->ssl_opts(
verify_hostname=>1,
# SSL_ca_path => '/etc/ssl/',
);
if ($is_verbose) {
say "searchurl = $sru";
}
my $req = $ua->get($sru);
if ($req->is_success) {
my $xres = $req->decoded_content;
return $xres;
} else {
croak ("Error was: ".$req->status_line());
}
}
1;
package SLUB::LZA::Rosetta::TA::Command::count;
use strict;
use warnings;
use feature qw(say);
use SLUB::LZA::Rosetta::TA -command;
sub abstract {"count IEs in Rosetta based Archival Information System";}
my $description=<<"DESCR";
Searches a Rosetta-based AIS for descriptive oder source metadata on behalf of the Technical Analyst
and return counts of matches.
Examples:
* Is this dc identifier in Archiv?
$0 count -d SLUB:LZA:Kitodo:kitodo:422766
* How many IEs were modified in 2021-05-31?
$0 count -m 2021-05-31
DESCR
sub description {
"$description"
}
sub opt_spec {
return(
["verbose|v" => "enable verbose output"],
["datemode" => hidden => {one_of => [
["creationdate|c=s" => "search based on creationdate string"],
["modificationdate|m=s" => "search based on modificationdate string"]
] } ],
["descriptive|d" => "count based on string search in descriptive metadata"],
["source|s" => "count based on string search in source metadata"],
["ie|i=s" => "search a specific IE"],
);
}
sub validate_args {
my ($self, $opt, $args) = @_;
# no args allowed but options!
$self->usage_error("No args allowed") if @$args;
}
sub execute {
my ($self, $opt, $args) = @_;
my $maxrecords="0";
my $startrecord=1;
my @queries;
if (exists $opt->{source}) {
push @queries, "IE.sourceMD.content=$opt->{source}";
}
if (exists $opt->{ie}) {
push @queries, "IE.dc.identifier==$opt->{ie}";
}
if (exists $opt->{descriptive}) {
push @queries, "IE.dc.identifier==$opt->{descriptive}";
}
if (exists $opt->{creationdate}) {
push @queries, "IE.objectCharacteristics.creationDate==$opt->{creationdate}";
}
if (exists $opt->{modificationdate}) {
push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
}
my $query = join(" and ", @queries);
my $response = SLUB::LZA::Rosetta::TA::sru_search('ie', $query, $startrecord, $maxrecords, $opt->{verbose});
$response=~s|.*?<numberOfRecords>(\d+)</numberOfRecords.*|$1|s;
say $response;
}
1;
package SLUB::LZA::Rosetta::TA::Command::init;
use strict;
use warnings;
use SLUB::LZA::Rosetta::TA -command;
use YAML;
use Path::Tiny;
use feature qw(say);
sub abstract {"Initialize $0";}
sub description {"Initialize $0, preparing config"}
sub opt_spec {
return(
["verbose|v" => "enable verbose output"],
["rosettahost|r=s" => "host", {required=>1}],
);
}
sub validate_args {
my ($self, $opt, $args) = @_;
# no args allowed but options!
$self->usage_error("No args allowed") if @$args;
}
sub execute {
my ($self, $opt, $args) = @_;
my %config;
$config{host} = $opt->{rosettahost};
if (defined $SLUB::LZA::Rosetta::TA::config_file) {
if (defined $opt->{verbose}) {
say "store config in $SLUB::LZA::Rosetta::TA::config_file";
}
YAML::DumpFile($SLUB::LZA::Rosetta::TA::config_file, %config);
}
}
1;
\ No newline at end of file
package SLUB::LZA::Rosetta::TA::Command::search;
use strict;
use warnings;
use feature qw(say);
use SLUB::LZA::Rosetta::TA -command;
sub abstract {"searches Rosetta based Archival Information System";}
my $description=<<"DESCR";
Searches a Rosetta-based AIS for descriptive oder source metadata on behalf of the Technical Analyst.
Returns SRU-response including dc-records. Only the first 10 matches will be presented by default.
Examples:
* Is this dc identifier in Archiv?
$0 count -d SLUB:LZA:Kitodo:kitodo:422766
* Which IE PID has this dc identifier?
$0 search -d SLUB:LZA:Kitodo:kitodo:422766
* Which IE PID are public domain?
$0 search -s publicdomain
* Which IE PID are copyrighted?
$0 search -s copyrighted
* Which IEs were modified in 2021-05-31?
$0 search -m 2021-05-31
DESCR
sub description {
"$description"
}
sub opt_spec {
return(
["verbose|v" => "enable verbose output"],
["datemode" => hidden => {one_of => [
["creationdate|c=s" => "search based on creationdate string"],
["modificationdate|m=s" => "search based on modificationdate string"]
] } ],
[ "descriptive|d=s", "search descriptive metadata (dc identifier)"],
[ "source|s=s", "search source metadata"],
["ie|i=s" => "search a specific IE"],
["maxrecords=i", "set maxrecords, default is 10"],
["startrecord=i", "set startrecord, default is 1"],
);
}
sub validate_args {
my ($self, $opt, $args) = @_;
# no args allowed but options!
$self->usage_error("No args allowed") if @$args;
}
sub execute {
my ($self, $opt, $args) = @_;
my $maxrecords="10";
if (exists $opt->{maxrecords}) {
$maxrecords = $opt->{maxrecords};
}
my $startrecord=1;
if (exists $opt->{startrecord}) {
$startrecord = $opt->{startrecord};
}
use Data::Printer; p($opt);
my @queries;
if (exists $opt->{source}) {
push @queries, "IE.sourceMD.content=$opt->{source}";
}
if (exists $opt->{ie}) {
push @queries, "IE.dc.identifier==$opt->{ie}";
}
if (exists $opt->{descriptive}) {
push @queries, "IE.dc.identifier==$opt->{descriptive}";
}
if (exists $opt->{creationdate}) {
push @queries, "IE.objectCharacteristics.creationDate==$opt->{creationdate}";
}
if (exists $opt->{modificationdate}) {
push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
}
my $query = join(" and ", @queries);
my $response = SLUB::LZA::Rosetta::TA::sru_search('ie', $query, $startrecord, $maxrecords, $opt->{verbose});
say $response;
}
1;
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment