Skip to content
Snippets Groups Projects
Commit a79401b9 authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- init

parent eba79e3c
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/perl -w
use strict;
use warnings;
###############################################################################
# Author: Andreas Romeyke
# SLUB Dresden, Department Longterm Preservation
#
# scans a given repository and reports problematic IE XML files
###############################################################################
use feature "say";
use Carp;
use File::Find;
use XML::LibXML;
use Time::Progress;
use Path::Tiny;
use IO::Handle;
STDOUT->autoflush(1);
my $dir = shift @ARGV;
my $tmp_ies_dir = Path::Tiny->tempdir(
TEMPLATE => "exitstrategy_XXXXXXXXXXX",
CLEANUP => 1
);
my $tmp_ies_unsorted_file = $tmp_ies_dir->child("unsorted_ies");
my $maxcount = 0;
my $allcount = 0;
my $sp = Time::Progress->new();
my @errorneous = ();
sub check_xml {
my $filename = shift;
eval {
XML::LibXML->load_xml(
location => $filename,
no_blanks => 1,
compact => 1
);
};
push @errorneous, $filename if $@;
} ## end sub check_xml
sub process_sip () {
my $file = $File::Find::name;
$allcount++;
if ( $file =~ m/V(\d+)-IE\d+\.xml$/ ) {
$tmp_ies_unsorted_file->append( $file . "\n" );
$maxcount++;
print "find IE files $maxcount of $allcount files, "
. $sp->report("%4l s \r");
} ## end if ( $file =~ m/V(\d+)-IE\d+\.xml$/)
return;
} ## end sub process_sip
say "searching IE files";
find( \&process_sip, $dir );
print "\r";
say "checking IE files";
my $fh = $tmp_ies_unsorted_file->openr();
my $p = Time::Progress->new(
min => 0,
max => $maxcount
);
my $i = 0;
while (<$fh>) {
print $p->report( "check IE files: %40b ETA: %E \r", $i++ );
chomp;
check_xml($_);
} ## end while (<$fh>)
print "\r";
say "done.";
foreach my $file (@errorneous) {
say " $file";
}
1;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment