diff --git a/lib/SLUB/LZA/Rosetta/TA.pm b/lib/SLUB/LZA/Rosetta/TA.pm index 03e6f7e3de7469a4f80348c26971d07cc634fd2b..1d6645c0c1fe19f18cd3d7ac6d678b64d07e6e9c 100644 --- a/lib/SLUB/LZA/Rosetta/TA.pm +++ b/lib/SLUB/LZA/Rosetta/TA.pm @@ -14,6 +14,8 @@ use Text::CSV_PP; # ABSTRACT: main module for ta-tool our %config; +our %cache; +our $cache_path; BEGIN{ my $home = path($ENV{'HOME'}); if ($home->is_dir() && !$home->is_rootdir) { @@ -22,9 +24,17 @@ BEGIN{ if ($config_path->is_file) { %config = YAML::LoadFile($config_path); } + $cache_path = $home->child('.cache')->child('ta-tool.cache'); + if ($cache_path->is_file) { + %cache = YAML::LoadFile($cache_path); + } } } +END { + YAML::DumpFile($cache_path, %cache); +} + sub sru_search { my $searchtype = shift; my $query = shift; @@ -160,7 +170,7 @@ sub helper_scan_log { $fh = $file->filehandle; } if (defined $fh) { - $fh_processing->( $fh ); + $fh_processing->( $fh, $file ); } undef $fh; } @@ -205,9 +215,23 @@ sub trace_log { $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP$searchid \(IE\d+\)}); $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation $searchid IE \d+ Copy ID: \d+}); } - my $fh_processing_stage1 = sub { my $fh = shift; + my $file = shift; + my $file_md5 = path($file)->digest(); + return if ( + exists( $cache{$file_md5} ) + and ( + ( + exists( $cache{$file_md5}->{deposit_dir}->{$deposit_dir}) + and exists( $cache{$file_md5}->{deposit_id}->{$deposit_id}) + and exists( $cache{$file_md5}->{sip_id}->{$sip_id}) + ) or ( + exists( $cache{$file_md5}->{ie_pid}->{$ie_pid}) + and exists( $cache{$file_md5}->{rep_id}{$rep_id}) + ) + ) + ); while(<$fh>) { if ( (defined $sip_id and defined $deposit_id and defined $deposit_dir) @@ -221,19 +245,47 @@ sub trace_log { } chomp; if ( m/$line_rx1/ ) { - if (!defined $sip_id and m/$sip_rx(\d{6}),/) { $sip_id = $2; } - if (!defined $deposit_dir and m/originalDirName=([^,]*),/) { $deposit_dir = $1; } - if (!defined $deposit_id and m/depositId=(\d+),/) { $deposit_id = $1;} + if (!defined $sip_id and m/$sip_rx(\d{6}),/) { + $sip_id = $2; + $cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match + } + if (!defined $deposit_dir and m/originalDirName=([^,]*),/) { + $deposit_dir = $1; + $cache{$file_md5}->{deposit_dir}->{$deposit_dir}=1; # mark as match + } + if (!defined $deposit_id and m/depositId=(\d+),/) { + $deposit_id = $1; + $cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match + } + } elsif (m/$line_rx2/) { - if (!defined $sip_id and m/SIP (\d{6})/) { $sip_id = $1;} - if (!defined $deposit_id and m/Deposit Activity ID=(\d+)/) { $deposit_id = $1;} + if (!defined $sip_id and m/SIP (\d{6})/) { + $sip_id = $1; + $cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match + } + if (!defined $deposit_id and m/Deposit Activity ID=(\d+)/) { + $deposit_id = $1; + $cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match + } } elsif (m/$line_rx3/) { - if (!defined $ie_pid and m/Loaded \d+ files for: REP\d+ \((IE\d+)/) { $ie_pid = $1;} - if (!defined $rep_id and m/Loaded \d+ files for: (REP\d+)/) { $rep_id = $1;} + if (!defined $ie_pid and m/Loaded \d+ files for: REP\d+ \((IE\d+)/) { + $ie_pid = $1; + $cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match + } + if (!defined $rep_id and m/Loaded \d+ files for: (REP\d+)/) { + $rep_id = $1; + $cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match + } } elsif (m/$line_rx4/) { my $rx = qr/Representation (\d+) IE (\d+)/; - if (!defined $ie_pid and m/$rx/) { $ie_pid = $2;} - if (!defined $rep_id and m/$rx/) { $rep_id = $1;} + if (!defined $ie_pid and m/$rx/) { + $ie_pid = $2; + $cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match + } + if (!defined $rep_id and m/$rx/) { + $rep_id = $1; + $cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match + } } } return 1; @@ -255,6 +307,19 @@ sub trace_log { use warnings; my $fh_processing_stage2 = sub { my $fh = shift; + my $file = shift; + my $file_md5 = path($file)->digest(); + return if ! ( + exists( $cache{$file_md5} ) + and + ( + exists( $cache{$file_md5}->{deposit_dir}->{$deposit_dir}) + or exists( $cache{$file_md5}->{deposit_id}->{$deposit_id}) + or exists( $cache{$file_md5}->{sip_id}->{$sip_id}) + or exists( $cache{$file_md5}->{ie_pid}->{$ie_pid}) + or exists( $cache{$file_md5}->{rep_id}->{$rep_id}) + ) + ); while(<$fh>) { if (!m/^$date_rx/) { next;