From 8562549d78514276de73d6de4d5e2732beaebdf4 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <andreas.romeyke@slub-dresden.de>
Date: Wed, 15 Feb 2023 12:38:31 +0100
Subject: [PATCH] - refactoring, extracted common SRU query to module
 common_sru - refactoring, moved functions to separate sub modules for better
 maintainability - added simple url encoding

---
 lib/SLUB/LZA/Rosetta/TA.pm                | 401 ----------------------
 lib/SLUB/LZA/Rosetta/TA/Command/count.pm  |  53 +--
 lib/SLUB/LZA/Rosetta/TA/Command/log.pm    |  11 +-
 lib/SLUB/LZA/Rosetta/TA/Command/search.pm |  60 +---
 lib/SLUB/LZA/Rosetta/TA/Log.pm            | 329 ++++++++++++++++++
 lib/SLUB/LZA/Rosetta/TA/SOAP.pm           |  42 +++
 lib/SLUB/LZA/Rosetta/TA/SRU.pm            |  57 +++
 lib/SLUB/LZA/Rosetta/TA/common_sru.pm     |  56 +++
 8 files changed, 500 insertions(+), 509 deletions(-)
 create mode 100644 lib/SLUB/LZA/Rosetta/TA/Log.pm
 create mode 100644 lib/SLUB/LZA/Rosetta/TA/SOAP.pm
 create mode 100644 lib/SLUB/LZA/Rosetta/TA/SRU.pm
 create mode 100644 lib/SLUB/LZA/Rosetta/TA/common_sru.pm

diff --git a/lib/SLUB/LZA/Rosetta/TA.pm b/lib/SLUB/LZA/Rosetta/TA.pm
index b5b6ff2..45f8a64 100644
--- a/lib/SLUB/LZA/Rosetta/TA.pm
+++ b/lib/SLUB/LZA/Rosetta/TA.pm
@@ -36,405 +36,4 @@ END {
     YAML::DumpFile($cache_path, %cache);
 }
 
-sub sru_search {
-    my $searchtype = shift;
-    my $query = shift;
-    my $startrecord = shift;
-    my $maxrecords = shift;
-    my $is_verbose = shift;
-    my %searchpaths = (
-        ie   => 'permanent/ie',
-        file => 'permanent/file',
-        sip  => 'operational'
-    );
-
-    if (!exists $searchpaths{$searchtype}){
-        croak ("Code error, wrong searchtype ($searchtype) used!");
-    }
-    my $protocol = 'https';
-    my $host = $config{host};
-    my $port = '8443';
-    my $searchpath = $searchpaths{$searchtype};
-    my $srubase="${protocol}://${host}:${port}/search/${searchpath}/sru";
-    my $sru = "${srubase}?version=1.2&operation=searchRetrieve&startRecord=$startrecord&maximumRecords=$maxrecords&recordSchema=dc&query=${query}";
-    my $ua = LWP::UserAgent->new(keep_alive => 1);
-    $ua->agent("MyApp/0.1 ");
-    $ua->timeout(3600);#1h
-    $ua->default_headers->push_header('Accept-Encoding' => 'br, lzma, bzip2, gzip, compressed, deflate');
-    $ua->ssl_opts(
-        verify_hostname=>1,
-        # SSL_ca_path => '/etc/ssl/',
-    );
-    if ($is_verbose) {
-        say "searchurl = $sru";
-    }
-    my $req = $ua->get($sru);
-    if ($req->is_success) {
-        my  $xres = $req->decoded_content;
-        return $xres;
-    } else {
-        croak ("Error was: ".$req->status_line());
-    }
-}
-
-sub get_ie_pid_by_sip {
-    my $sip = shift;
-    my $protocol = "https";
-    my $host = $config{host};
-    my $port = 8443;
-    my $wsdl_url="${protocol}://${host}:${port}/dpsws/repository/SipWebServices?wsdl";
-    #print "DEBUG: $wsdl_url";
-    my $soap = SOAP::Lite->new;
-    $soap->proxy(
-                $wsdl_url,
-                timeout    => 3000,
-                keep_alive => 1,
-                ssl_opts   => {
-                    verify_hostname=>1,
-                    # SSL_ca_path => '/etc/ssl/',
-                }
-            );
-    $soap->ns('http://dps.exlibris.com/');
-    $soap->on_action(sub {return ''}); # remove SOAP action if used with Rosetta 6.xx or higher
-    my $som = $soap->call(
-        'getSipIEs',
-        SOAP::Data->name('arg0')->value($sip)->type('string')
-    );
-    if ($som->fault) {
-        confess ("ERROR: server '$host' says: ".$som->faultstring."\n");
-    }
-    #my $res = log_empty_result( $som, $logger, $host);
-    my $res = $som->result;
-    if ($res eq '') { # defined but empty result without SOAP fault
-        # HINT: 2 possibilities
-        # * SIP-ID doesn't exist
-        # * SIP-ID existed before but Rosetta clean up job removed all status infos
-        return; # let caller decide how to act on an empty result (no SOAP error)
-    }
-    my $ie = $res;
-    $ie=~s/^IE(\d*).*/$1/;
-    return $ie;
-}
-
-{
-    my $bred = "\e[1;31m";
-    my $red = "\e[31m";
-    my $green = "\e[32m";
-    my $blue = "\e[34m";
-    my $bblue = "\e[1;34m";
-    my $gray = "\e[90m]";
-    my $reversed = "\e[7m";
-    my $reset = "\e[0m";
-    my $back_yellow = "\e[103m";
-    my $back_cyan = "\e[45m";
-    my $back_green = "\e[43m";
-    my $datetime_rx=qr/\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d,\d\d\d/;
-    sub colorize {
-        my $line = shift;
-        my $opt = shift;
-        my $match_rx = shift;
-        # patterns in common interest:
-        $line =~ s/^($datetime_rx)/${blue}$1${reset}/;
-        if ($opt->{match} ne ".*") {
-            $line =~ s/( (DEBUG|INFO|WARN|ERROR)  .*?)($match_rx)/$1${reversed}$3${reset}/; # order important!
-        }
-        $line =~ s/ (DEBUG) / ${gray}$1${reset} /
-            || $line =~ s/ (INFO) / ${green}$1${reset} /
-            || $line =~ s/ (WARN) / ${red}$1${reset} /
-            || $line =~ s/ (ERROR) / ${bred}$1${reset} /;
-        $line =~ s/(SIP ?\d+)/${back_yellow}$1${reset}/g;
-        $line =~ s/(IE ?\d+)/${back_yellow}$1${reset}/g;
-        $line =~ s/(dc.identifier)/${back_cyan}$1${reset}/g;
-        return $line;
-    }
-    sub colorize_trace {
-        my $line = shift;
-        my $deposit_dir = shift;
-        my $deposit_id = shift;
-        my $sip_id = shift;
-        my $ie_pid = shift;
-        my $rep_id = shift;
-        # patterns in common interest:
-        $line =~ s/^($datetime_rx)/${blue}$1${reset}/;
-        $line =~ s/ (DEBUG) / ${gray}$1${reset} /
-            || $line =~ s/ (INFO) / ${green}$1${reset} /
-            || $line =~ s/ (WARN) / ${red}$1${reset} /
-            || $line =~ s/ (ERROR) / ${bred}$1${reset} /;
-        if (defined $deposit_dir) {$line =~ s/($deposit_dir)/${back_green}$1${reset}/g;}
-        if (defined $deposit_id ) {$line =~ s/(dep_|Deposit Activity ID=|depositId=)($deposit_id)/$1${back_green}$2${reset}/g;}
-        if (defined $sip_id     ) {$line =~ s/((sipId|SIP|PID|pid)[ =]?)($sip_id)/$1${back_green}$3${reset}/g;}
-        if (defined $ie_pid     ) {$line =~ s/(IE ?)($ie_pid)/$1${back_green}$2${reset}/g;}
-        if (defined $rep_id     ) {$line =~ s/(Representation )($rep_id)/$1${back_green}$2${reset}/g;}
-        return $line;
-    }
-}
-
-{
-    my $csv;
-    sub csv {
-        my $line = shift;
-        my $opt = shift;
-        my $match_rx = shift;
-        my $ret;
-        if (!defined $csv) {
-            $csv = Text::CSV_PP->new(
-                {
-                    sep_char => ";",
-
-                }
-            );
-            $ret=join(";", qw(date time level where msg))."\n";
-        }
-        my $date_rx=qr/\d\d\d\d-\d\d-\d\d/;
-        my $time_rx=qr/\d\d:\d\d:\d\d,\d\d\d/;
-        my $level_rx=qr/DEBUG|INFO|WARN|ERROR/;
-        my $where_rx=qr/\[.*?\]/;
-        my $msg_rx=qr/.*$/;
-        $line =~ m/^($date_rx) ($time_rx) ($level_rx)  ($where_rx) ($msg_rx)/;
-        $csv->combine($1, $2, $3, $4, $5);
-        $ret.= $csv->string;
-    }
-}
-
-
-sub helper_scan_log {
-    my $directory = shift;
-    my $fh_processing = shift;
-    for ($directory->children( qr/^server.log/ )) {
-        my $file = $_;
-        if (!$file->is_file) { next; }
-        my $fh;
-        if ($file =~ m/\.gz$/) {
-            $fh = IO::Zlib->new("$file", "rb");
-        } else {
-            $fh = $file->filehandle;
-        }
-        if (defined $fh) {
-            $fh_processing->( $fh, $file );
-        }
-        undef $fh;
-    }
-    return 1;
-}
-
-sub trace_log {
-    my $with_trace=shift;
-    my $with_color=shift;
-    my $date_rx=shift;
-    my $level_rx=shift;
-    my $match_rx=shift;
-    my $output_filter=shift;
-    my $directory = path($config{logdir});
-    my $deposit_id;
-    my $deposit_dir;
-    my $sip_id;
-    my $rep_id;
-    my $ie_pid;
-    my $searchid = $with_trace;
-    $searchid=~s/^(REP|SIP|IE)(\d+)$/$2/;
-    say "SEARCHID=$searchid";
-    # match to:
-    # 1. ... | processing {originalDirName=eb9c1924-4bab-11ec-baca-f69de10fbd49, depositId=422950, ... userName=Goobi_SMA, SIP 422438, producerType=TRUSTED, producerGroup=PG_Goobi, contentStructure=METS, materialFlowSR=0, producerId=264981, contentStructureId=264951, converter_class_name=com.exlibris.dps.deposit.converters.METSCSConverter, retentionPoliciesId=NO_RETENTION} from work queue SIP_LOADING_WORK_QUEUE finished
-    # 2. ...     enqueued {originalDirName=d2cb9509-4bad-11ec-baca-b925eea982a1, depositId=422958, ... userName=Goobi_SMA, sipId=422446, producerType=TRUSTED, producerGroup=PG_Goobi, contentStructure=METS, materialFlowSR=0, producerId=264981, contentStructureId=264951, converter_class_name=com.exlibris.dps.deposit.converters.METSCSConverter, retentionPoliciesId=NO_RETENTION} on work queue V2SL_shr00.SIP_LOADING_WORK_QUEUE
-    # 3. ... SIP 13156, Deposit Activity ID=17589Properties
-    my $sip_rx = qr/(SIP |sipId=)/;
-    my $datetime_rx = qr/$date_rx \d\d:\d\d:\d\d,\d\d\d/;
-    my $pre_rx = qr/$datetime_rx INFO  /;
-
-    my $line_rx1 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*(originalDirName=|depositId=|$sip_rx)$searchid});
-    my $line_rx2 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*(SIP |Deposit Activity ID=)$searchid});
-    my $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: .*$searchid});
-    my $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*((Representation $searchid IE \d+)|(Representation \d+ IE $searchid)) Copy ID: \d+});
-    if ($with_trace =~ m/^SIP/) { # search specific sip
-        $sip_id=$searchid;
-        $ie_pid=get_ie_pid_by_sip($sip_id);
-        $line_rx1 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*($sip_rx)$searchid});
-        $line_rx2 = $line_rx1;
-    } elsif ($with_trace =~ m/^IE/) { # search specific IE
-        $ie_pid=$searchid;
-        $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP\d+ \(IE$searchid\)});
-        $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation \d+ IE $searchid Copy ID: \d+});
-    } elsif ($with_trace =~ m/^REP/) {
-        $rep_id=$searchid;
-        $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP$searchid \(IE\d+\)});
-        $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation $searchid IE \d+ Copy ID: \d+});
-    }
-    my $fh_processing_stage1 = sub {
-        my $fh = shift;
-        my $file = shift;
-        my $file_md5 = path($file)->digest();
-        return if (
-            exists( $cache{$file_md5} )
-                and (
-                (       exists($cache{$file_md5}->{deposit_dir})
-                    and exists($cache{$file_md5}->{deposit_id})
-                    and exists($cache{$file_md5}->{sip_id})
-                    and exists($cache{$file_md5}->{deposit_dir}->{$searchid})
-                    and exists($cache{$file_md5}->{deposit_id}->{$searchid})
-                    and exists($cache{$file_md5}->{sip_id}->{$searchid})
-                ) or (
-                        exists($cache{$file_md5}->{rep_id})
-                    and exists($cache{$file_md5}->{ie_pid})
-                    and exists($cache{$file_md5}->{ie_pid}->{$searchid})
-                    and exists($cache{$file_md5}->{rep_id}->{$searchid})
-                )
-               )
-        );
-        while(<$fh>) {
-            if (
-                (defined $sip_id and defined $deposit_id and defined $deposit_dir)
-                    or (defined $ie_pid and defined $rep_id)
-            ) { last; }
-            if (!m/^$pre_rx/) {
-                next;
-            }
-            if (!m/$searchid/) {
-                next;
-            }
-            chomp;
-            if ( m/$line_rx1/ ) {
-                if (!defined $sip_id      and m/$sip_rx(\d{6}),/) {
-                    $sip_id = $2;
-                    if (!defined $ie_pid) {
-                        $ie_pid=get_ie_pid_by_sip($sip_id);
-                    }
-                    $cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match
-                }
-                if (!defined $deposit_dir and m/originalDirName=([^,]*),/) {
-                    $deposit_dir = $1;
-                    $cache{$file_md5}->{deposit_dir}->{$deposit_dir}=1; # mark as match
-                }
-                if (!defined $deposit_id  and m/depositId=(\d+),/) {
-                    $deposit_id = $1;
-                    $cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match
-                }
-
-            } elsif (m/$line_rx2/) {
-                if (!defined $sip_id and m/SIP (\d{6})/) {
-                    $sip_id = $1;
-                    if (!defined $ie_pid) {
-                        $ie_pid=get_ie_pid_by_sip($sip_id);
-                    }
-                    $cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match
-                }
-                if (!defined $deposit_id and m/Deposit Activity ID=(\d+)/) {
-                    $deposit_id = $1;
-                    $cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match
-                }
-            } elsif (m/$line_rx3/) {
-                if (!defined $ie_pid and m/Loaded \d+ files for: REP\d+ \((IE\d+)/) {
-                    $ie_pid = $1;
-                    $cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match
-                }
-                if (!defined $rep_id and m/Loaded \d+ files for: (REP\d+)/) {
-                    $rep_id = $1;
-                    $cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match
-                }
-            } elsif (m/$line_rx4/) {
-                my $rx = qr/Representation (\d+) IE (\d+)/;
-                if (!defined $ie_pid and m/$rx/) {
-                    $ie_pid = $2;
-                    $cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match
-                }
-                if (!defined $rep_id and m/$rx/) {
-                    $rep_id = $1;
-                    $cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match
-                }
-            }
-        }
-        return 1;
-    };
-    helper_scan_log($directory, $fh_processing_stage1);
-    no warnings;
-    my $match= sprintf("found: DIR=%s, DEPOSITID=%d, SIPID=%s, IEPID=%s, REPID=%s",
-        $deposit_dir ? $deposit_dir : "----",
-        $deposit_id ?  $deposit_id  : "----",
-        $sip_id ?      $sip_id      : "----",
-        $ie_pid ?      "IE".$ie_pid : "----",
-        $rep_id ?      "REP".$rep_id: "----"
-    );
-    use warnings;
-    say "$match";
-    say "-"x(length($match));
-    # now call scan_log and use own colorizer
-    if ($with_color) {
-        $output_filter = sub {colorize_trace($_[0], $deposit_dir, $deposit_id, $sip_id, $ie_pid, $rep_id)};
-    }
-    my $search_rxo = Regexp::Optimizer->new->optimize(qr/^$date_rx [^ ]* $level_rx  (.*?)$match_rx(.*?)$/);
-    no warnings;
-    my $nextline_rx1= Regexp::Optimizer->new->optimize(qr{originalDirName=$deposit_dir|(depositID|Deposit Activity ID)[= ]$deposit_id|(sipId|SIP[= ]?)$sip_id});
-    my $nextline_rx2=Regexp::Optimizer->new->optimize(qr{IE[ ]?$ie_pid|REP[ ]?$rep_id});
-    use warnings;
-    my $fh_processing_stage2 = sub {
-        my $fh = shift;
-        my $file = shift;
-        my $file_md5 = path($file)->digest();
-        return if ! (
-            exists( $cache{$file_md5} )
-            and
-            (
-                       (exists ($cache{$file_md5}->{deposit_dir})  and exists($cache{$file_md5}->{deposit_dir}->{$deposit_dir}))
-                    or (exists ($cache{$file_md5}->{deposit_id})   and exists($cache{$file_md5}->{deposit_id}->{$deposit_id}))
-                    or (exists ($cache{$file_md5}->{sip_id})       and exists($cache{$file_md5}->{sip_id}->{$sip_id}))
-                    or (exists ($cache{$file_md5}->{ie_pid}) and exists($cache{$file_md5}->{ie_pid}->{$ie_pid}))
-                    or (exists ($cache{$file_md5}->{rep_id}) and exists($cache{$file_md5}->{rep_id}->{$rep_id}))
-            )
-        );
-        while(<$fh>) {
-            if (!m/^$date_rx/) {
-                next;
-            }
-            if (! m/$search_rxo/) {
-                #print "no match for '$_'";
-                next;
-            }
-            if (
-                (
-                    defined $deposit_dir
-                and defined $deposit_id
-                and defined $sip_id
-                and !m/$nextline_rx1/
-                ) or (
-                    defined $ie_pid
-                and defined $rep_id
-                and !m/$nextline_rx2/
-                )
-            ) {
-                next;
-            }
-            chomp;
-            my $line = $output_filter->( $_ );
-            say $line;
-        }
-    };
-    helper_scan_log($directory, $fh_processing_stage2);
-}
-
-sub scan_log {
-    my $date_rx=shift;
-    my $level_rx=shift;
-    my $match_rx=shift;
-    my $output_filter=shift;
-    # open dir from config{$logdir}
-    # for all files matching server.log*; do
-    #   read lines
-    #   filter lines
-    #   return
-    my $directory = path($config{logdir});
-    my $search_rxo = Regexp::Optimizer->new->optimize(qr/^$date_rx [^ ]* $level_rx  (.*?)$match_rx(.*?)$/);
-    my $fh_processing = sub {
-        my $fh = shift;
-        while(<$fh>) {
-            if (! m/$search_rxo/) {
-                #print "no match for '$_'";
-                next;
-            }
-            chomp;
-            my $line = $output_filter->( $_ );
-            say $line;
-        }
-    };
-    helper_scan_log($directory, $fh_processing);
-}
-
 1;
diff --git a/lib/SLUB/LZA/Rosetta/TA/Command/count.pm b/lib/SLUB/LZA/Rosetta/TA/Command/count.pm
index 4728dac..171f243 100644
--- a/lib/SLUB/LZA/Rosetta/TA/Command/count.pm
+++ b/lib/SLUB/LZA/Rosetta/TA/Command/count.pm
@@ -3,6 +3,8 @@ use strict;
 use warnings;
 use feature qw(say);
 use SLUB::LZA::Rosetta::TA -command;
+use SLUB::LZA::Rosetta::TA::common_sru;
+use SLUB::LZA::Rosetta::TA::SRU;
 
 sub abstract {"count IEs in Rosetta based Archival Information System";}
 
@@ -63,55 +65,8 @@ sub execute {
     my ($self, $opt, $args) = @_;
     my $maxrecords="0";
     my $startrecord=1;
-    my @queries;
-    if (exists $opt->{source}) {
-        push @queries, "IE.sourceMD.content=$opt->{source}";
-    }
-    if (exists $opt->{ie}) {
-        push @queries, "IE.dc.identifier==$opt->{ie}";
-    }
-    if (exists $opt->{descriptive}) {
-        push @queries, "IE.dc.identifier==$opt->{descriptive}";
-    }
-    if (exists $opt->{creationdate}) {
-        push @queries, "IE.objectCharacteristics.creationDate==$opt->{creationdate}";
-    }
-    if (exists $opt->{modificationdate}) {
-        push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
-    }
-    if (exists $opt->{'with_format'}) {
-        push @queries, "FILE.generalFileCharacteristics.formatLibraryId==$opt->{'with_format'}";
-    }
-    # unsupported by Rosetta <= 7.0
-    #if (exists $opt->{'without_format'}) {
-    #    push @queries, "FILE.generalFileCharacteristics.formatLibraryId!=$opt->{'without_format'}";
-    #}
-    if (exists $opt->{'with_valid_files'}) {
-        push @queries, "FILE.fileValidation.isValid=true";
-    }
-    if (exists $opt->{'with_invalid_files'}) {
-        push @queries, "FILE.fileValidation.isValid=false";
-    }
-    if (exists $opt->{'with_passed_viruschecks'}) {
-        push @queries, "FILE.fileVirusCheck.status=true";
-    }
-    if (exists $opt->{'with_failed_viruschecks'}) {
-        push @queries, "FILE.fileVirusCheck.status=false";
-    }
-    if (exists $opt->{'with_missed_viruschecks'}) {
-        push @queries, "FILE.fileVirusCheck.status==%22%22";
-    }
-
-
-    #my $query = join(" and ", @queries);
-    my $query = join("+", @queries);
-    if (exists $opt->{debug}) {
-        use Data::Printer;
-        p( $opt);
-        p(@queries);
-        p( $query);
-    }
-    my $response = SLUB::LZA::Rosetta::TA::sru_search('ie', $query, $startrecord, $maxrecords, $opt->{verbose});
+    my $query = SLUB::LZA::Rosetta::TA::common_sru::prepare_query($opt);
+    my $response = SLUB::LZA::Rosetta::TA::SRU::sru_search('ie', $query, $startrecord, $maxrecords, $opt->{verbose});
     $response=~s|.*?<numberOfRecords>(\d+)</numberOfRecords.*|$1|s;
     say $response;
 }
diff --git a/lib/SLUB/LZA/Rosetta/TA/Command/log.pm b/lib/SLUB/LZA/Rosetta/TA/Command/log.pm
index 46fb77c..6db540b 100644
--- a/lib/SLUB/LZA/Rosetta/TA/Command/log.pm
+++ b/lib/SLUB/LZA/Rosetta/TA/Command/log.pm
@@ -7,6 +7,7 @@ use DateTime;
 use DateTime::Format::DateParse;
 
 use SLUB::LZA::Rosetta::TA -command;
+use SLUB::LZA::Rosetta::TA::Log;
 
 sub abstract {"grep server log of Rosetta based Archival Information System";}
 
@@ -122,22 +123,22 @@ sub execute {
     # prepare output filter
     my $output_filter=sub { $_[0]; };
     if (defined $opt->colorize) {
-        $output_filter = sub { colorize($_[0], $opt, $match_rx); };
+        $output_filter = sub { SLUB::LZA::Rosetta::TA::Log::colorize($_[0], $opt, $match_rx); };
     } elsif (defined $opt->csv) {
-        $output_filter = sub { csv($_[0], $opt, $match_rx); };
+        $output_filter = sub { SLUB::LZA::Rosetta::TA::Log::csv($_[0], $opt, $match_rx); };
     }
     # prepare trace
     my $with_trace;
     if (defined $opt->trace) {
         $with_trace = $opt->trace;
         if (defined $opt->colorize) {
-            $output_filter = sub { colorize($_[0], $opt, $match_rx); };
+            $output_filter = sub { SLUB::LZA::Rosetta::TA::Log::colorize($_[0], $opt, $match_rx); };
         } else {
             $output_filter=sub { $_[0]; };
         }
-        SLUB::LZA::Rosetta::TA::trace_log($with_trace, $opt->colorize, $date_rx, $level_rx, $match_rx, $output_filter, );
+        SLUB::LZA::Rosetta::TA::Log::trace_log($with_trace, $opt->colorize, $date_rx, $level_rx, $match_rx, $output_filter, );
     } else {
-        SLUB::LZA::Rosetta::TA::scan_log($date_rx, $level_rx, $match_rx, $output_filter);
+        SLUB::LZA::Rosetta::TA::Log::scan_log($date_rx, $level_rx, $match_rx, $output_filter);
     }
 }
 
diff --git a/lib/SLUB/LZA/Rosetta/TA/Command/search.pm b/lib/SLUB/LZA/Rosetta/TA/Command/search.pm
index c7d730b..097ca49 100644
--- a/lib/SLUB/LZA/Rosetta/TA/Command/search.pm
+++ b/lib/SLUB/LZA/Rosetta/TA/Command/search.pm
@@ -3,6 +3,8 @@ use strict;
 use warnings;
 use feature qw(say);
 use SLUB::LZA::Rosetta::TA -command;
+use SLUB::LZA::Rosetta::TA::common_sru;
+use SLUB::LZA::Rosetta::TA::SRU;
 
 sub abstract {"searches Rosetta based Archival Information System";}
 my $description=<<"DESCR";
@@ -36,9 +38,8 @@ sub opt_spec {
     return(
         ["verbose|v" => "enable verbose output"],
         ["datemode" => hidden => {one_of => [
-        ["creationdate|c=s" => "search based on creationdate string"],
-        ["modificationdate|m=s" => "search based on modificationdate string"]
-
+            ["creationdate|c=s" => "search based on creationdate string"],
+            ["modificationdate|m=s" => "search based on modificationdate string"]
         ] } ],
         [ "descriptive|d=s", "search descriptive metadata (dc identifier)"],
         [ "source|s=s", "search source metadata"],
@@ -59,7 +60,6 @@ sub opt_spec {
             ["with-missed-viruschecks" => "with missed virus checks"],
             ["with-failed-viruschecks" => "with failed virus checks"],
         ]}],
-
     );
 }
 sub validate_args {
@@ -78,56 +78,8 @@ sub execute {
     if (exists $opt->{startrecord}) {
         $startrecord = $opt->{startrecord};
     }
-    my @queries;
-    if (exists $opt->{source}) {
-        push @queries, "IE.sourceMD.content==$opt->{source}";
-    }
-    if (exists $opt->{ie}) {
-        push @queries, "IE.dc.identifier==$opt->{ie}";
-    }
-    if (exists $opt->{descriptive}) {
-        push @queries, "IE.dc.identifier==$opt->{descriptive}";
-    }
-    if (exists $opt->{creationdate}) {
-        push @queries, "IE.objectCharacteristics.creationDate==$opt->{creationdate}";
-    }
-    if (exists $opt->{modificationdate}) {
-        push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
-    }
-    if (exists $opt->{'with_format'}) {
-        push @queries, "FILE.generalFileCharacteristics.formatLibraryId==$opt->{'with_format'}";
-    }
-    # unsupported by Rosetta <= 7.0
-    #if (exists $opt->{'without_format'}) {
-    #    push @queries, "FILE.generalFileCharacteristics.formatLibraryId!=$opt->{'without_format'}";
-    #}
-    if (exists $opt->{'with_valid_files'}) {
-        push @queries, "FILE.fileValidation.isValid==true";
-    }
-    if (exists $opt->{'with_invalid_files'}) {
-        push @queries, "FILE.fileValidation.isValid==false";
-    }
-    if (exists $opt->{'with_passed_viruschecks'}) {
-        push @queries, "FILE.fileVirusCheck.status==true";
-    }
-    if (exists $opt->{'with_failed_viruschecks'}) {
-        push @queries, "FILE.fileVirusCheck.status==false";
-    }
-    if (exists $opt->{'with_missed_viruschecks'}) {
-        push @queries, "FILE.fileVirusCheck.status==%22%22";
-    }
-
-
-    #my $query = join(" and ", @queries);
-    my $query = join("+", @queries);
-    if (exists $opt->{debug}) {
-        use Data::Printer;
-        p( $opt);
-        p(@queries);
-        p( $query);
-    }
-
-    my $response = SLUB::LZA::Rosetta::TA::sru_search('ie', $query, $startrecord, $maxrecords, $opt->{verbose});
+    my $query = SLUB::LZA::Rosetta::TA::common_sru::prepare_query($opt);
+    my $response = SLUB::LZA::Rosetta::TA::SRU::sru_search('ie', $query, $startrecord, $maxrecords, $opt->{verbose});
     say $response;
 }
 
diff --git a/lib/SLUB/LZA/Rosetta/TA/Log.pm b/lib/SLUB/LZA/Rosetta/TA/Log.pm
new file mode 100644
index 0000000..8b0e1ef
--- /dev/null
+++ b/lib/SLUB/LZA/Rosetta/TA/Log.pm
@@ -0,0 +1,329 @@
+package SLUB::LZA::Rosetta::TA::Log;
+use strict;
+use warnings;
+use feature qw(say);
+use SLUB::LZA::Rosetta::TA::SOAP;
+use Path::Tiny;
+
+sub helper_scan_log {
+    my $directory = shift;
+    my $fh_processing = shift;
+    for ($directory->children( qr/^server.log/ )) {
+        my $file = $_;
+        if (!$file->is_file) { next; }
+        my $fh;
+        if ($file =~ m/\.gz$/) {
+            $fh = IO::Zlib->new("$file", "rb");
+        } else {
+            $fh = $file->filehandle;
+        }
+        if (defined $fh) {
+            $fh_processing->( $fh, $file );
+        }
+        undef $fh;
+    }
+    return 1;
+}
+
+sub trace_log {
+    my $with_trace=shift;
+    my $with_color=shift;
+    my $date_rx=shift;
+    my $level_rx=shift;
+    my $match_rx=shift;
+    my $output_filter=shift;
+    my $directory = path($SLUB::LZA::Rosetta::TA::config{logdir});
+    my $deposit_id;
+    my $deposit_dir;
+    my $sip_id;
+    my $rep_id;
+    my $ie_pid;
+    my $searchid = $with_trace;
+    $searchid=~s/^(REP|SIP|IE)(\d+)$/$2/;
+    say "SEARCHID=$searchid";
+    # match to:
+    # 1. ... | processing {originalDirName=eb9c1924-4bab-11ec-baca-f69de10fbd49, depositId=422950, ... userName=Goobi_SMA, SIP 422438, producerType=TRUSTED, producerGroup=PG_Goobi, contentStructure=METS, materialFlowSR=0, producerId=264981, contentStructureId=264951, converter_class_name=com.exlibris.dps.deposit.converters.METSCSConverter, retentionPoliciesId=NO_RETENTION} from work queue SIP_LOADING_WORK_QUEUE finished
+    # 2. ...     enqueued {originalDirName=d2cb9509-4bad-11ec-baca-b925eea982a1, depositId=422958, ... userName=Goobi_SMA, sipId=422446, producerType=TRUSTED, producerGroup=PG_Goobi, contentStructure=METS, materialFlowSR=0, producerId=264981, contentStructureId=264951, converter_class_name=com.exlibris.dps.deposit.converters.METSCSConverter, retentionPoliciesId=NO_RETENTION} on work queue V2SL_shr00.SIP_LOADING_WORK_QUEUE
+    # 3. ... SIP 13156, Deposit Activity ID=17589Properties
+    my $sip_rx = qr/(SIP |sipId=)/;
+    my $datetime_rx = qr/$date_rx \d\d:\d\d:\d\d,\d\d\d/;
+    my $pre_rx = qr/$datetime_rx INFO  /;
+
+    my $line_rx1 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*(originalDirName=|depositId=|$sip_rx)$searchid});
+    my $line_rx2 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*(SIP |Deposit Activity ID=)$searchid});
+    my $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: .*$searchid});
+    my $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*((Representation $searchid IE \d+)|(Representation \d+ IE $searchid)) Copy ID: \d+});
+    if ($with_trace =~ m/^SIP/) { # search specific sip
+        $sip_id=$searchid;
+        $ie_pid=get_ie_pid_by_sip($sip_id);
+        $line_rx1 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*($sip_rx)$searchid});
+        $line_rx2 = $line_rx1;
+    } elsif ($with_trace =~ m/^IE/) { # search specific IE
+        $ie_pid=$searchid;
+        $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP\d+ \(IE$searchid\)});
+        $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation \d+ IE $searchid Copy ID: \d+});
+    } elsif ($with_trace =~ m/^REP/) {
+        $rep_id=$searchid;
+        $line_rx3 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Loaded \d+ files for: REP$searchid \(IE\d+\)});
+        $line_rx4 = Regexp::Optimizer->new->optimize(qr{^$pre_rx.*Representation $searchid IE \d+ Copy ID: \d+});
+    }
+    my $fh_processing_stage1 = sub {
+        my $fh = shift;
+        my $file = shift;
+        my $file_md5 = path($file)->digest();
+        return if (
+            exists( $SLUB::LZA::Rosetta::TA::cache{$file_md5} )
+                and (
+                (       exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_dir})
+                    and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_id})
+                    and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{sip_id})
+                    and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_dir}->{$searchid})
+                    and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_id}->{$searchid})
+                    and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{sip_id}->{$searchid})
+                ) or (
+                    exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{rep_id})
+                        and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{ie_pid})
+                        and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{ie_pid}->{$searchid})
+                        and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{rep_id}->{$searchid})
+                )
+            )
+        );
+        while(<$fh>) {
+            if (
+                (defined $sip_id and defined $deposit_id and defined $deposit_dir)
+                    or (defined $ie_pid and defined $rep_id)
+            ) { last; }
+            if (!m/^$pre_rx/) {
+                next;
+            }
+            if (!m/$searchid/) {
+                next;
+            }
+            chomp;
+            if ( m/$line_rx1/ ) {
+                if (!defined $sip_id      and m/$sip_rx(\d{6}),/) {
+                    $sip_id = $2;
+                    if (!defined $ie_pid) {
+                        $ie_pid=get_ie_pid_by_sip($sip_id);
+                    }
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match
+                }
+                if (!defined $deposit_dir and m/originalDirName=([^,]*),/) {
+                    $deposit_dir = $1;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_dir}->{$deposit_dir}=1; # mark as match
+                }
+                if (!defined $deposit_id  and m/depositId=(\d+),/) {
+                    $deposit_id = $1;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match
+                }
+
+            } elsif (m/$line_rx2/) {
+                if (!defined $sip_id and m/SIP (\d{6})/) {
+                    $sip_id = $1;
+                    if (!defined $ie_pid) {
+                        $ie_pid=get_ie_pid_by_sip($sip_id);
+                    }
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{sip_id}->{$sip_id}=1; # mark as match
+                }
+                if (!defined $deposit_id and m/Deposit Activity ID=(\d+)/) {
+                    $deposit_id = $1;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_id}->{$deposit_id}=1; # mark as match
+                }
+            } elsif (m/$line_rx3/) {
+                if (!defined $ie_pid and m/Loaded \d+ files for: REP\d+ \((IE\d+)/) {
+                    $ie_pid = $1;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match
+                }
+                if (!defined $rep_id and m/Loaded \d+ files for: (REP\d+)/) {
+                    $rep_id = $1;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match
+                }
+            } elsif (m/$line_rx4/) {
+                my $rx = qr/Representation (\d+) IE (\d+)/;
+                if (!defined $ie_pid and m/$rx/) {
+                    $ie_pid = $2;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{ie_pid}->{$ie_pid}=1; # mark as match
+                }
+                if (!defined $rep_id and m/$rx/) {
+                    $rep_id = $1;
+                    $SLUB::LZA::Rosetta::TA::cache{$file_md5}->{rep_id}->{$rep_id}=1; # mark as match
+                }
+            }
+        }
+        return 1;
+    };
+    helper_scan_log($directory, $fh_processing_stage1);
+    no warnings;
+    my $match= sprintf("found: DIR=%s, DEPOSITID=%d, SIPID=%s, IEPID=%s, REPID=%s",
+        $deposit_dir ? $deposit_dir : "----",
+        $deposit_id ?  $deposit_id  : "----",
+        $sip_id ?      $sip_id      : "----",
+        $ie_pid ?      "IE".$ie_pid : "----",
+        $rep_id ?      "REP".$rep_id: "----"
+    );
+    use warnings;
+    say "$match";
+    say "-"x(length($match));
+    # now call scan_log and use own colorizer
+    if ($with_color) {
+        $output_filter = sub {colorize_trace($_[0], $deposit_dir, $deposit_id, $sip_id, $ie_pid, $rep_id)};
+    }
+    my $search_rxo = Regexp::Optimizer->new->optimize(qr/^$date_rx [^ ]* $level_rx  (.*?)$match_rx(.*?)$/);
+    no warnings;
+    my $nextline_rx1= Regexp::Optimizer->new->optimize(qr{originalDirName=$deposit_dir|(depositID|Deposit Activity ID)[= ]$deposit_id|(sipId|SIP[= ]?)$sip_id});
+    my $nextline_rx2=Regexp::Optimizer->new->optimize(qr{IE[ ]?$ie_pid|REP[ ]?$rep_id});
+    use warnings;
+    my $fh_processing_stage2 = sub {
+        my $fh = shift;
+        my $file = shift;
+        my $file_md5 = path($file)->digest();
+        return if ! (
+            exists( $SLUB::LZA::Rosetta::TA::cache{$file_md5} )
+                and
+                (
+                    (exists ($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_dir})  and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_dir}->{$deposit_dir}))
+                        or (exists ($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_id})   and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{deposit_id}->{$deposit_id}))
+                        or (exists ($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{sip_id})       and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{sip_id}->{$sip_id}))
+                        or (exists ($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{ie_pid}) and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{ie_pid}->{$ie_pid}))
+                        or (exists ($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{rep_id}) and exists($SLUB::LZA::Rosetta::TA::cache{$file_md5}->{rep_id}->{$rep_id}))
+                )
+        );
+        while(<$fh>) {
+            if (!m/^$date_rx/) {
+                next;
+            }
+            if (! m/$search_rxo/) {
+                #print "no match for '$_'";
+                next;
+            }
+            if (
+                (
+                    defined $deposit_dir
+                        and defined $deposit_id
+                        and defined $sip_id
+                        and !m/$nextline_rx1/
+                ) or (
+                    defined $ie_pid
+                        and defined $rep_id
+                        and !m/$nextline_rx2/
+                )
+            ) {
+                next;
+            }
+            chomp;
+            my $line = $output_filter->( $_ );
+            say $line;
+        }
+    };
+    helper_scan_log($directory, $fh_processing_stage2);
+}
+
+sub scan_log {
+    my $date_rx=shift;
+    my $level_rx=shift;
+    my $match_rx=shift;
+    my $output_filter=shift;
+    # open dir from config{$logdir}
+    # for all files matching server.log*; do
+    #   read lines
+    #   filter lines
+    #   return
+    my $directory = path($SLUB::LZA::Rosetta::TA::config{logdir});
+    my $search_rxo = Regexp::Optimizer->new->optimize(qr/^$date_rx [^ ]* $level_rx  (.*?)$match_rx(.*?)$/);
+    my $fh_processing = sub {
+        my $fh = shift;
+        while(<$fh>) {
+            if (! m/$search_rxo/) {
+                #print "no match for '$_'";
+                next;
+            }
+            chomp;
+            my $line = $output_filter->( $_ );
+            say $line;
+        }
+    };
+    helper_scan_log($directory, $fh_processing);
+}
+
+
+{
+    my $bred = "\e[1;31m";
+    my $red = "\e[31m";
+    my $green = "\e[32m";
+    my $blue = "\e[34m";
+    my $bblue = "\e[1;34m";
+    my $gray = "\e[90m]";
+    my $reversed = "\e[7m";
+    my $reset = "\e[0m";
+    my $back_yellow = "\e[103m";
+    my $back_cyan = "\e[45m";
+    my $back_green = "\e[43m";
+    my $datetime_rx=qr/\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d,\d\d\d/;
+    sub colorize {
+        my $line = shift;
+        my $opt = shift;
+        my $match_rx = shift;
+        # patterns in common interest:
+        $line =~ s/^($datetime_rx)/${blue}$1${reset}/;
+        if ($opt->{match} ne ".*") {
+            $line =~ s/( (DEBUG|INFO|WARN|ERROR)  .*?)($match_rx)/$1${reversed}$3${reset}/; # order important!
+        }
+        $line =~ s/ (DEBUG) / ${gray}$1${reset} /
+            || $line =~ s/ (INFO) / ${green}$1${reset} /
+            || $line =~ s/ (WARN) / ${red}$1${reset} /
+            || $line =~ s/ (ERROR) / ${bred}$1${reset} /;
+        $line =~ s/(SIP ?\d+)/${back_yellow}$1${reset}/g;
+        $line =~ s/(IE ?\d+)/${back_yellow}$1${reset}/g;
+        $line =~ s/(dc.identifier)/${back_cyan}$1${reset}/g;
+        return $line;
+    }
+    sub colorize_trace {
+        my $line = shift;
+        my $deposit_dir = shift;
+        my $deposit_id = shift;
+        my $sip_id = shift;
+        my $ie_pid = shift;
+        my $rep_id = shift;
+        # patterns in common interest:
+        $line =~ s/^($datetime_rx)/${blue}$1${reset}/;
+        $line =~ s/ (DEBUG) / ${gray}$1${reset} /
+            || $line =~ s/ (INFO) / ${green}$1${reset} /
+            || $line =~ s/ (WARN) / ${red}$1${reset} /
+            || $line =~ s/ (ERROR) / ${bred}$1${reset} /;
+        if (defined $deposit_dir) {$line =~ s/($deposit_dir)/${back_green}$1${reset}/g;}
+        if (defined $deposit_id ) {$line =~ s/(dep_|Deposit Activity ID=|depositId=)($deposit_id)/$1${back_green}$2${reset}/g;}
+        if (defined $sip_id     ) {$line =~ s/((sipId|SIP|PID|pid)[ =]?)($sip_id)/$1${back_green}$3${reset}/g;}
+        if (defined $ie_pid     ) {$line =~ s/(IE ?)($ie_pid)/$1${back_green}$2${reset}/g;}
+        if (defined $rep_id     ) {$line =~ s/(Representation )($rep_id)/$1${back_green}$2${reset}/g;}
+        return $line;
+    }
+}
+
+{
+    my $csv;
+    sub csv {
+        my $line = shift;
+        my $opt = shift;
+        my $match_rx = shift;
+        my $ret;
+        if (!defined $csv) {
+            $csv = Text::CSV_PP->new(
+                {
+                    sep_char => ";",
+
+                }
+            );
+            $ret=join(";", qw(date time level where msg))."\n";
+        }
+        my $date_rx=qr/\d\d\d\d-\d\d-\d\d/;
+        my $time_rx=qr/\d\d:\d\d:\d\d,\d\d\d/;
+        my $level_rx=qr/DEBUG|INFO|WARN|ERROR/;
+        my $where_rx=qr/\[.*?\]/;
+        my $msg_rx=qr/.*$/;
+        $line =~ m/^($date_rx) ($time_rx) ($level_rx)  ($where_rx) ($msg_rx)/;
+        $csv->combine($1, $2, $3, $4, $5);
+        $ret.= $csv->string;
+    }
+}
+
+1;
diff --git a/lib/SLUB/LZA/Rosetta/TA/SOAP.pm b/lib/SLUB/LZA/Rosetta/TA/SOAP.pm
new file mode 100644
index 0000000..4f78e71
--- /dev/null
+++ b/lib/SLUB/LZA/Rosetta/TA/SOAP.pm
@@ -0,0 +1,42 @@
+package SLUB::LZA::Rosetta::TA::SOAP;
+use strict;
+use warnings;
+sub get_ie_pid_by_sip {
+    my $sip = shift;
+    my $protocol = "https";
+    my $host = $SLUB::LZA::Rosetta::TA::config{host};
+    my $port = 8443;
+    my $wsdl_url="${protocol}://${host}:${port}/dpsws/repository/SipWebServices?wsdl";
+    #print "DEBUG: $wsdl_url";
+    my $soap = SOAP::Lite->new;
+    $soap->proxy(
+        $wsdl_url,
+        timeout    => 3000,
+        keep_alive => 1,
+        ssl_opts   => {
+            verify_hostname=>1,
+            # SSL_ca_path => '/etc/ssl/',
+        }
+    );
+    $soap->ns('http://dps.exlibris.com/');
+    $soap->on_action(sub {return ''}); # remove SOAP action if used with Rosetta 6.xx or higher
+    my $som = $soap->call(
+        'getSipIEs',
+        SOAP::Data->name('arg0')->value($sip)->type('string')
+    );
+    if ($som->fault) {
+        confess ("ERROR: server '$host' says: ".$som->faultstring."\n");
+    }
+    #my $res = log_empty_result( $som, $logger, $host);
+    my $res = $som->result;
+    if ($res eq '') { # defined but empty result without SOAP fault
+        # HINT: 2 possibilities
+        # * SIP-ID doesn't exist
+        # * SIP-ID existed before but Rosetta clean up job removed all status infos
+        return; # let caller decide how to act on an empty result (no SOAP error)
+    }
+    my $ie = $res;
+    $ie=~s/^IE(\d*).*/$1/;
+    return $ie;
+}
+1;
diff --git a/lib/SLUB/LZA/Rosetta/TA/SRU.pm b/lib/SLUB/LZA/Rosetta/TA/SRU.pm
new file mode 100644
index 0000000..26a19eb
--- /dev/null
+++ b/lib/SLUB/LZA/Rosetta/TA/SRU.pm
@@ -0,0 +1,57 @@
+package SLUB::LZA::Rosetta::TA::SRU;
+use strict;
+use warnings;
+use feature qw(say);
+
+sub url_encode_simple {
+    my $string = shift;
+    $string =~ s/ /%20/g;
+    $string =~ s/"/%22/g;
+    $string =~ s/'/%27/g;
+    $string =~ s/:/%3a/g;
+    return $string;
+}
+
+sub sru_search {
+    my $searchtype = shift;
+    my $unencoded_query = shift;
+    my $query = url_encode_simple($unencoded_query);
+    my $startrecord = shift;
+    my $maxrecords = shift;
+    my $is_verbose = shift;
+    my %searchpaths = (
+        ie   => 'permanent/ie',
+        file => 'permanent/file',
+        sip  => 'operational'
+    );
+
+    if (!exists $searchpaths{$searchtype}){
+        croak ("Code error, wrong searchtype ($searchtype) used!");
+    }
+    my $protocol = 'https';
+    my $host = $SLUB::LZA::Rosetta::TA::config{host};
+    my $port = '8443';
+    my $searchpath = $searchpaths{$searchtype};
+    my $srubase="${protocol}://${host}:${port}/search/${searchpath}/sru";
+    my $sru = "${srubase}?version=1.2&operation=searchRetrieve&startRecord=$startrecord&maximumRecords=$maxrecords&recordSchema=dc&query=${query}";
+    my $ua = LWP::UserAgent->new(keep_alive => 1);
+    $ua->agent("MyApp/0.1 ");
+    $ua->timeout(3600);#1h
+    $ua->default_headers->push_header('Accept-Encoding' => 'br, lzma, bzip2, gzip, compressed, deflate');
+    $ua->ssl_opts(
+        verify_hostname=>1,
+        # SSL_ca_path => '/etc/ssl/',
+    );
+    if ($is_verbose) {
+        say "searchurl = $sru";
+    }
+    my $req = $ua->get($sru);
+    if ($req->is_success) {
+        my  $xres = $req->decoded_content;
+        return $xres;
+    } else {
+        croak ("Error was: ".$req->status_line());
+    }
+}
+
+1;
diff --git a/lib/SLUB/LZA/Rosetta/TA/common_sru.pm b/lib/SLUB/LZA/Rosetta/TA/common_sru.pm
new file mode 100644
index 0000000..b00b967
--- /dev/null
+++ b/lib/SLUB/LZA/Rosetta/TA/common_sru.pm
@@ -0,0 +1,56 @@
+package SLUB::LZA::Rosetta::TA::common_sru;
+use strict;
+use warnings;
+
+sub prepare_query {
+    my $opt = shift;
+    my @queries;
+    if (exists $opt->{source}) {
+        push @queries, "IE.sourceMD.content=\"$opt->{source}\"";
+    }
+    if (exists $opt->{ie}) {
+        push @queries, "IE.dc.identifier==\"$opt->{ie}\"";
+    }
+    if (exists $opt->{descriptive}) {
+        push @queries, "IE.dc.identifier=\"$opt->{descriptive}\"";
+    }
+    if (exists $opt->{creationdate}) {
+        push @queries, "IE.objectCharacteristics.creationDate==$opt->{creationdate}";
+    }
+    if (exists $opt->{modificationdate}) {
+        push @queries, "FILE.objectCharacteristics.modificationDate==$opt->{modificationdate}";
+    }
+    if (exists $opt->{'with_format'}) {
+        push @queries, "FILE.generalFileCharacteristics.formatLibraryId==$opt->{'with_format'}";
+    }
+    # unsupported by Rosetta <= 7.0
+    #if (exists $opt->{'without_format'}) {
+    #    push @queries, "FILE.generalFileCharacteristics.formatLibraryId!=$opt->{'without_format'}";
+    #}
+    if (exists $opt->{'with_valid_files'}) {
+        push @queries, "FILE.fileValidation.isValid==true";
+    }
+    if (exists $opt->{'with_invalid_files'}) {
+        push @queries, "FILE.fileValidation.isValid==false";
+    }
+    if (exists $opt->{'with_passed_viruschecks'}) {
+        push @queries, "FILE.fileVirusCheck.status==true";
+    }
+    if (exists $opt->{'with_failed_viruschecks'}) {
+        push @queries, "FILE.fileVirusCheck.status==false";
+    }
+    if (exists $opt->{'with_missed_viruschecks'}) {
+        push @queries, "FILE.fileVirusCheck.status==\"\"";
+    }
+    #my $query = join(" and ", @queries);
+    my $query = join("+", @queries);
+    if (exists $opt->{debug}) {
+        use Data::Printer;
+        p( $opt);
+        p(@queries);
+        p( $query);
+    }
+    return $query;
+}
+
+1;
-- 
GitLab