Skip to content
Snippets Groups Projects
Commit db0ac963 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- simplified (and hopefully increased performance) parsing filesize

parent 18b3e799
No related branches found
No related tags found
No related merge requests found
...@@ -394,10 +394,13 @@ sub check_if_db_conform ($string, $filename) { ...@@ -394,10 +394,13 @@ sub check_if_db_conform ($string, $filename) {
# #
############################################################################### ###############################################################################
my $compiled_xpath_dmdSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:dmdSec'); my $compiled_xpath_dmdSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:dmdSec');
my $compiled_xpath_amdSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'REP\')]'); my $compiled_xpath_amdSec_rep = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'REP\')]');
my $compiled_xpath_amdSec_fl = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'FL\')]');
my $compiled_xpath_fileSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:fileSec'); my $compiled_xpath_fileSec = XML::LibXML::XPathExpression->new('/mets:mets/mets:fileSec');
my $compiled_xpath_titles = XML::LibXML::XPathExpression->new('mets:mdWrap[1]/mets:xmlData[1]/dc:record/dc:title[1]'); my $compiled_xpath_titles = XML::LibXML::XPathExpression->new('mets:mdWrap[1]/mets:xmlData[1]/dc:record/dc:title[1]');
my $compiled_xpath_fileGrp = XML::LibXML::XPathExpression->new('/mets:mets/mets:fileSec/mets:fileGrp');
my $compiled_xpath_files = XML::LibXML::XPathExpression->new("mets:file/mets:FLocat/\@xlin:href"); my $compiled_xpath_files = XML::LibXML::XPathExpression->new("mets:file/mets:FLocat/\@xlin:href");
my $compiled_xpath_filepids = XML::LibXML::XPathExpression->new('mets:file/@ID');
my $compiled_xpath_dcrecords = XML::LibXML::XPathExpression->new('mets:mdWrap/mets:xmlData/dc:record/*'); my $compiled_xpath_dcrecords = XML::LibXML::XPathExpression->new('mets:mdWrap/mets:xmlData/dc:record/*');
my $compiled_xpath_repid = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'REP\')]/@ID'); my $compiled_xpath_repid = XML::LibXML::XPathExpression->new('/mets:mets/mets:amdSec[starts-with(@ID, \'REP\')]/@ID');
my $str_local_record = "mets:techMD/mets:mdWrap/mets:xmlData/*[local-name()=\'dnx\']/*[local-name()=\'section\']/*[local-name()=\'record\']"; my $str_local_record = "mets:techMD/mets:mdWrap/mets:xmlData/*[local-name()=\'dnx\']/*[local-name()=\'section\']/*[local-name()=\'record\']";
...@@ -447,31 +450,33 @@ sub check_if_db_conform ($string, $filename) { ...@@ -447,31 +450,33 @@ sub check_if_db_conform ($string, $filename) {
# if $found == 0 do nothing # if $found == 0 do nothing
return $repid; return $repid;
} }
sub get_filepids_ref ($xp, $fsp, $repid) {
my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $fsp)->[0];
my @files_nodes = $xp->findnodes($compiled_xpath_filepids, $filegrp);
my @filepids = map { $_->nodeValue; } @files_nodes;
return \@filepids;
}
sub get_files_ref ($xp, $fsp, $repid){ sub get_files_ref ($xp, $fsp, $repid){
my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $fsp)->[0]; my $filegrp = $xp->findnodes("mets:fileGrp[\@ADMID='$repid']", $fsp)->[0];
my @files_nodes = $xp->findnodes($compiled_xpath_files, $filegrp); my @files_nodes = $xp->findnodes($compiled_xpath_files, $filegrp);
my @files = map { my $tmp= $_->nodeValue; $tmp } @files_nodes; my @files = map { $_->nodeValue; } @files_nodes;
return \@files; return \@files;
} }
sub get_filesize ($xp, $filepath) { sub get_file_path($xp, $fsp, $filepid) {
my $xpath_id =<<"XPATH"; return $xp->findvalue("mets:fileGrp/mets:file[\@ID=\"$filepid\"]/mets:FLocat/\@xlin:href", $fsp);
/mets:mets/mets:fileSec/mets:fileGrp/mets:file[mets:FLocat/\@*[ }
namespace-uri()='http://www.w3.org/1999/xlink' and local-name()='href']='$filepath']/\@ID
XPATH sub get_filesize ($xp, $filepid) {
my $xpath =<<"XPATH2"; my $xpath =<<"XPATH";
/mets:mets/mets:amdSec/mets:techMD[ /mets:mets/mets:amdSec[starts-with(\@ID, \'$filepid\')]/mets:techMD[\@ID=\"$filepid-amd-tech\"]
$xpath_id /mets:mdWrap/mets:xmlData/*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='dnx']
and
starts-with(\@ID,$xpath_id)
]/mets:mdWrap/mets:xmlData/*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='dnx']
/*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='section'] /*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='section']
/*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='record'] /*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='record']
/*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='key' and \@id='fileSizeBytes']/text() /*[namespace-uri()='http://www.exlibrisgroup.com/dps/dnx' and local-name()='key' and \@id='fileSizeBytes']/text()
XPATH2 XPATH
my $size = $xp->findvalue($xpath); return $xp->findvalue($xpath);
return $size;
} }
sub get_purged_states($xp, $amd) { sub get_purged_states($xp, $amd) {
...@@ -523,7 +528,7 @@ XPATH2 ...@@ -523,7 +528,7 @@ XPATH2
my $xp = get_xpath_context(); my $xp = get_xpath_context();
$xp->setContextNode($dom); $xp->setContextNode($dom);
my $dmdsec = $xp->findnodes($compiled_xpath_dmdSec)->[0]; my $dmdsec = $xp->findnodes($compiled_xpath_dmdSec)->[0];
my $amdsec = $xp->findnodes($compiled_xpath_amdSec)->[0]; my $amdsec = $xp->findnodes($compiled_xpath_amdSec_rep)->[0];
my $filesec= $xp->findnodes($compiled_xpath_fileSec)->[0]; my $filesec= $xp->findnodes($compiled_xpath_fileSec)->[0];
my $ret; my $ret;
$ret->{"filename"} = $filename; $ret->{"filename"} = $filename;
...@@ -559,11 +564,17 @@ XPATH2 ...@@ -559,11 +564,17 @@ XPATH2
$ret->{"repid"} = $repid; $ret->{"repid"} = $repid;
############################################ ############################################
# get all files of LOCAL representation # get all files of LOCAL representation
$ret->{"filepids"} = get_filepids_ref($xp, $filesec, $repid);
$ret->{"files"} = get_files_ref($xp, $filesec, $repid); $ret->{"files"} = get_files_ref($xp, $filesec, $repid);
foreach my $location (@{$ret->{"files"}}) { foreach my $fpid (@{$ret->{"filepids"}}) {
$ret->{"sizes"}->{$location} = get_filesize($xp, $location); #say "pid=$fpid";
} my $location = get_file_path($xp, $filesec, $fpid);
#say "location=$location";
my $size = get_filesize($xp, $fpid);
#say "size=$size";
$ret->{"sizes"}->{$location} = $size ;
}
return $ret; return $ret;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment