Skip to content
Snippets Groups Projects
Commit f03c5f04 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- added cache support to avoid superflous reading

parent 4156d360
No related branches found
No related tags found
No related merge requests found
...@@ -14,6 +14,8 @@ use Text::CSV_PP; ...@@ -14,6 +14,8 @@ use Text::CSV_PP;
# ABSTRACT: main module for ta-tool # ABSTRACT: main module for ta-tool
our %config; our %config;
our %cache;
our $cache_path;
BEGIN{ BEGIN{
my $home = path($ENV{'HOME'}); my $home = path($ENV{'HOME'});
if ($home->is_dir() && !$home->is_rootdir) { if ($home->is_dir() && !$home->is_rootdir) {
...@@ -22,9 +24,17 @@ BEGIN{ ...@@ -22,9 +24,17 @@ BEGIN{
if ($config_path->is_file) { if ($config_path->is_file) {
%config = YAML::LoadFile($config_path); %config = YAML::LoadFile($config_path);
} }
$cache_path = $home->child('.cache')->child('ta-tool.cache');
if ($cache_path->is_file) {
%cache = YAML::LoadFile($cache_path);
}
} }
} }
END {
YAML::DumpFile($cache_path, %cache);
}
sub sru_search { sub sru_search {
my $searchtype = shift; my $searchtype = shift;
my $query = shift; my $query = shift;
...@@ -160,7 +170,7 @@ sub helper_scan_log { ...@@ -160,7 +170,7 @@ sub helper_scan_log {
$fh = $file->filehandle; $fh = $file->filehandle;
} }
if (defined $fh) { if (defined $fh) {
$fh_processing->( $fh ); $fh_processing->( $fh, $file );
} }
undef $fh; undef $fh;
} }
...@@ -205,9 +215,23 @@ sub trace_log { ...@@ -205,9 +215,23 @@ sub trace_log {
$line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP$searchid \(IE\d+\)}); $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP$searchid \(IE\d+\)});
$line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation $searchid IE \d+ Copy ID: \d+}); $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation $searchid IE \d+ Copy ID: \d+});
} }
my $fh_processing_stage1 = sub { my $fh_processing_stage1 = sub {
my $fh = shift; my $fh = shift;
my $file = shift;
my $file_md5 = path($file)->digest();
return if (
exists( $cache{$file_md5} )
and (
(
exists( $cache{$file_md5}->{deposit_dir}->{$deposit_dir})
and exists( $cache{$file_md5}->{deposit_id}->{$deposit_id})
and exists( $cache{$file_md5}->{sip_id}->{$sip_id})
) or (
exists( $cache{$file_md5}->{ie_pid}->{$ie_pid})
and exists( $cache{$file_md5}->{rep_id}{$rep_id})
)
)
);
while(<$fh>) { while(<$fh>) {
if ( if (
(defined $sip_id and defined $deposit_id and defined $deposit_dir) (defined $sip_id and defined $deposit_id and defined $deposit_dir)
...@@ -221,19 +245,47 @@ sub trace_log { ...@@ -221,19 +245,47 @@ sub trace_log {
} }
chomp; chomp;
if ( m/$line_rx1/ ) { if ( m/$line_rx1/ ) {
if (!defined $sip_id and m/$sip_rx(\d{6}),/) { $sip_id = $2; } if (!defined $sip_id and m/$sip_rx(\d{6}),/) {
if (!defined $deposit_dir and m/originalDirName=([^,]*),/) { $deposit_dir = $1; } $sip_id = $2;
if (!defined $deposit_id and m/depositId=(\d+),/) { $deposit_id = $1;} $cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match
}
if (!defined $deposit_dir and m/originalDirName=([^,]*),/) {
$deposit_dir = $1;
$cache{$file_md5}->{deposit_dir}->{$deposit_dir}=1; # mark as match
}
if (!defined $deposit_id and m/depositId=(\d+),/) {
$deposit_id = $1;
$cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match
}
} elsif (m/$line_rx2/) { } elsif (m/$line_rx2/) {
if (!defined $sip_id and m/SIP (\d{6})/) { $sip_id = $1;} if (!defined $sip_id and m/SIP (\d{6})/) {
if (!defined $deposit_id and m/Deposit Activity ID=(\d+)/) { $deposit_id = $1;} $sip_id = $1;
$cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match
}
if (!defined $deposit_id and m/Deposit Activity ID=(\d+)/) {
$deposit_id = $1;
$cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match
}
} elsif (m/$line_rx3/) { } elsif (m/$line_rx3/) {
if (!defined $ie_pid and m/Loaded \d+ files for: REP\d+ \((IE\d+)/) { $ie_pid = $1;} if (!defined $ie_pid and m/Loaded \d+ files for: REP\d+ \((IE\d+)/) {
if (!defined $rep_id and m/Loaded \d+ files for: (REP\d+)/) { $rep_id = $1;} $ie_pid = $1;
$cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match
}
if (!defined $rep_id and m/Loaded \d+ files for: (REP\d+)/) {
$rep_id = $1;
$cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match
}
} elsif (m/$line_rx4/) { } elsif (m/$line_rx4/) {
my $rx = qr/Representation (\d+) IE (\d+)/; my $rx = qr/Representation (\d+) IE (\d+)/;
if (!defined $ie_pid and m/$rx/) { $ie_pid = $2;} if (!defined $ie_pid and m/$rx/) {
if (!defined $rep_id and m/$rx/) { $rep_id = $1;} $ie_pid = $2;
$cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match
}
if (!defined $rep_id and m/$rx/) {
$rep_id = $1;
$cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match
}
} }
} }
return 1; return 1;
...@@ -255,6 +307,19 @@ sub trace_log { ...@@ -255,6 +307,19 @@ sub trace_log {
use warnings; use warnings;
my $fh_processing_stage2 = sub { my $fh_processing_stage2 = sub {
my $fh = shift; my $fh = shift;
my $file = shift;
my $file_md5 = path($file)->digest();
return if ! (
exists( $cache{$file_md5} )
and
(
exists( $cache{$file_md5}->{deposit_dir}->{$deposit_dir})
or exists( $cache{$file_md5}->{deposit_id}->{$deposit_id})
or exists( $cache{$file_md5}->{sip_id}->{$sip_id})
or exists( $cache{$file_md5}->{ie_pid}->{$ie_pid})
or exists( $cache{$file_md5}->{rep_id}->{$rep_id})
)
);
while(<$fh>) { while(<$fh>) {
if (!m/^$date_rx/) { if (!m/^$date_rx/) {
next; next;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment