From 55834acf2b7455eaa90250dbc47e7df5b83c2ac7 Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <art1@andreas-romeyke.de> Date: Wed, 23 Feb 2022 16:52:57 +0100 Subject: [PATCH] - refactoring - only call checkit_tiff twice if binaries and profiles differ - add getProfile() support - enhanced main for tests --- ...calMetadataExtractorCheckItTiffPlugin.java | 173 +++++++++++++----- 1 file changed, 125 insertions(+), 48 deletions(-) diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorCheckItTiffPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorCheckItTiffPlugin.java index 622afc3..4075ccb 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorCheckItTiffPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorCheckItTiffPlugin.java @@ -1,5 +1,5 @@ /* -2014-2018 by Andreas Romeyke (SLUB Dresden) +2014-2022 by Andreas Romeyke (SLUB Dresden) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -Hint: works only with checkit_tiff version 0.3.1 or higher +Hint: works only with checkit_tiff version 1.0.0 or higher */ package org.slub.rosetta.dps.repository.plugin; @@ -25,13 +25,17 @@ import com.exlibris.dps.sdk.techmd.MDExtractorPlugin; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.MessageDigest; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.nio.file.*; /** @@ -43,15 +47,24 @@ import java.nio.file.*; /*public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtractorPlugin { */ public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtractorPlugin { private static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorCheckItTiffPlugin.class); - private Map<Checkit_tiff_versions, String> checkit_tiff_binary_path = new HashMap<Checkit_tiff_versions, String>(); - private Map<Checkit_tiff_versions, String> checkit_tiff_config_path = new HashMap<Checkit_tiff_versions, String>(); - private Map<Checkit_tiff_versions, Boolean> is_checkit_tiff_valid = new HashMap<Checkit_tiff_versions, Boolean>(); + private enum Checkit_tiff_versions { + current, upcoming + } + private final Map<Checkit_tiff_versions, String> checkit_tiff_binary_path = new HashMap<Checkit_tiff_versions, String>(); + private final Map<Checkit_tiff_versions, String> checkit_tiff_config_path = new HashMap<Checkit_tiff_versions, String>(); private String exiftool_binary_path; private List<String> extractionErrors = new ArrayList<String>(); - private List<String> validationLog = new ArrayList<String>(); + private final List<String> validationLog = new ArrayList<String>(); private Boolean isvalid = false; private Boolean iswellformed = false; - private Map<String,String> attributes = new HashMap<String, String>(); + private final Map<String,String> attributes = new HashMap<String, String>(); + private Map<Checkit_tiff_versions, String> md5Profile = new HashMap<>(); + private Map<Checkit_tiff_versions, String> md5CheckitTiff = new HashMap<>(); + private boolean isDifferentProfile = true; + private boolean isDifferentCheckItTiff = true; + private Map<Checkit_tiff_versions, Boolean> is_checkit_tiff_valid = new HashMap<Checkit_tiff_versions, Boolean>(); + + /** constructor */ public SLUBTechnicalMetadataExtractorCheckItTiffPlugin() { log.info("SLUBTechnicalMetadataExtractorCheckItTiffPlugin instantiated"); @@ -60,33 +73,6 @@ public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtrac } } - /** stand-alone check, main file to call local installed clamd - * @param args list of files which should be scanned - */ - public static void main(String[] args) { - SLUBTechnicalMetadataExtractorCheckItTiffPlugin plugin = new SLUBTechnicalMetadataExtractorCheckItTiffPlugin(); - Map<String, String> initp = new HashMap<String, String>(); - // initp.put( "checkit_tiff", "/usr/bin/checkit_tiff"); - // initp.put( "config_file", "/etc/checkit_tiff/slub.cfg"); - initp.put( "current_checkit_tiff", "/home/romeyke/git/checkit_tiff/build/checkit_tiff"); - initp.put( "current_config_file", "/home/romeyke/git/checkit_tiff/example_configs/cit_tiff6_baseline_SLUBrelaxed.cfg"); - initp.put( "upcoming_checkit_tiff", "/home/romeyke/git/checkit_tiff/build/checkit_tiff"); - initp.put( "upcoming_config_file", "/home/romeyke/git/checkit_tiff/example_configs/cit_tiff6_baseline_SLUBrelaxed.cfg"); - initp.put( "exiftool", "/usr/bin/exiftool"); - plugin.initParams( initp ); - System.out.println("Agent: '" + plugin.getAgent() + "'"); - System.out.println(); - for (String file : args) { - try { - plugin.extract(file); - } catch (Exception e) { - e.printStackTrace(); - } - System.out.println("RESULT: " + plugin.isValid()); - System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors()); - } - } - /** init params to configure the plugin via xml forms * @param initp parameter map */ @@ -95,8 +81,20 @@ public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtrac this.checkit_tiff_binary_path.put(Checkit_tiff_versions.upcoming, initp.get("upcoming_checkit_tiff").trim()); this.checkit_tiff_config_path.put(Checkit_tiff_versions.current, initp.get("current_config_file").trim()); this.checkit_tiff_config_path.put(Checkit_tiff_versions.upcoming, initp.get("upcoming_config_file").trim()); - + this.exiftool_binary_path = initp.get("exiftool").trim(); + try { + check_path(exiftool_binary_path, "path for exiftool_binary", true); + for (Checkit_tiff_versions v: Checkit_tiff_versions.values()) { + check_path(checkit_tiff_binary_path.get(v), "path for checkit_tiff binary " + v, true); + check_path(checkit_tiff_config_path.get(v), "path for checkit_tiff config " + v, false); + md5CheckitTiff.put(v, md5SumOfFile(checkit_tiff_binary_path.get(v))); + md5Profile.put(v,md5SumOfFile(checkit_tiff_config_path.get(v))); + } + this.isDifferentProfile = ! this.md5Profile.get(Checkit_tiff_versions.current).equals(this.md5Profile.get(Checkit_tiff_versions.upcoming)); + } catch (Exception e) { + e.printStackTrace(); + } log.info("SLUBTechnicalMetadataExtractorCheckItTiffPlugin instantiated with " + "(current: " + " checkit_tiff_binary_path=" + checkit_tiff_binary_path.get(Checkit_tiff_versions.current) @@ -105,7 +103,8 @@ public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtrac + " checkit_tiff_binary_path=" + checkit_tiff_binary_path.get(Checkit_tiff_versions.upcoming) + " cfg=" + checkit_tiff_config_path.get(Checkit_tiff_versions.upcoming) + ")" - + " and exiftool_binary_path=" + exiftool_binary_path); + + " and exiftool_binary_path=" + exiftool_binary_path + ); } private void parse_exiftool_output( String exiftoolxml ) { @@ -136,16 +135,9 @@ public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtrac @Override public void extract(String filePath) throws Exception { - for (Checkit_tiff_versions v: Checkit_tiff_versions.values()) { - check_path( checkit_tiff_binary_path.get(v), "path for (" + v.name() + ") checkit_tiff_binary", true); - check_path( checkit_tiff_config_path.get(v), "path for (" + v.name() + ") checkit_tiff_config", false); - } - - check_path(exiftool_binary_path, "path for exiftool_binary", true); validate_tiff_by_upcoming_checkit_tiff(filePath); - /* only check against current checkit_tiff if upcoming fails */ - if (is_checkit_tiff_valid.get(Checkit_tiff_versions.upcoming) == false) { + if ((this.isDifferentCheckItTiff || isDifferentProfile) && is_checkit_tiff_valid.get(Checkit_tiff_versions.upcoming) == false) { validate_tiff_by_current_checkit_tiff(filePath); } @@ -1129,9 +1121,94 @@ public class SLUBTechnicalMetadataExtractorCheckItTiffPlugin implements MDExtrac return "image/tiff"; } - private enum Checkit_tiff_versions { - current, upcoming + @Override + public String getProfile () { + /* there is no documentation in ExL API, therefore we use it to document the profile versions in a light way */ + String returnvalue = ""; + for (Checkit_tiff_versions v: Checkit_tiff_versions.values()) { + String modificationdate = modificationDateOfFile(this.checkit_tiff_config_path.get(v)); + returnvalue = returnvalue + v + "profile:\n" + + " path=" + this.checkit_tiff_config_path.get(v) + "\n" + + " md5sum=" + this.md5CheckitTiff.get(v) + "\n" + + " modification date=" + modificationdate + "\n"; + } + return returnvalue; + } + + private String md5SumOfFile(String filename ) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] b = Files.readAllBytes(Paths.get(filename)); + byte[] digest = md.digest(b); + String hexdigest = new BigInteger(1, digest).toString(16); + return hexdigest; + } catch (Exception e) { + e.printStackTrace(); + } + return ""; } -} + private String modificationDateOfFile(String filename ) { + try { + return Files.getLastModifiedTime(Paths.get(filename)).toString(); + } catch (IOException e) { + e.printStackTrace(); + } + return ""; + } + /** stand-alone check, main file to call local installed clamd + * @param args list of files which should be scanned + */ + /** stand-alone check, main file to call local installed clamd + * @param args list of files which should be scanned + */ + public static void main(String[] args) { + SLUBTechnicalMetadataExtractorCheckItTiffPlugin plugin = new SLUBTechnicalMetadataExtractorCheckItTiffPlugin(); + Map<String, String> initp = new HashMap<String, String>(); + initp.put( "current_checkit_tiff", "/operational_shared/software/checkit_tiff_current"); + initp.put( "current_config_file", "/operational_shared/software/cit_tiff6_baseline_SLUB_current.cfg"); + initp.put( "upcoming_checkit_tiff", "/operational_shared/software/checkit_tiff_upcoming"); + initp.put( "upcoming_config_file", "/operational_shared/software/cit_tiff6_baseline_SLUB_current.cfg"); + initp.put( "exiftool", "/usr/bin/exiftool"); + plugin.initParams( initp ); + System.out.println("Agent: '" + plugin.getAgent() + "'"); + System.out.println(); + for (String file : args) { + try { + plugin.extract(file); + } catch (Exception e) { + e.printStackTrace(); + } + System.out.println("RESULT: " + plugin.isValid()); + System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors()); + } + System.out.println("----------------------------------"); + System.out.println("getAgentName:"); + System.out.println( plugin.getAgentName()); + System.out.println("----------------------------------"); + System.out.println("getAgent:"); + System.out.println( plugin.getAgent()); + System.out.println("----------------------------------"); + System.out.println("getSupportedAttributeNames:"); + System.out.println( plugin.getSupportedAttributeNames()); + System.out.println("----------------------------------"); + System.out.println("getFormatName:"); + System.out.println( plugin.getFormatName()); + System.out.println("----------------------------------"); + System.out.println("getFormatVersion:"); + System.out.println( plugin.getFormatVersion()); + System.out.println("----------------------------------"); + System.out.println("getMimeType:"); + System.out.println( plugin.getMimeType()); + System.out.println("----------------------------------"); + System.out.println("getProfile:"); + System.out.println( plugin.getProfile()); + System.out.println("----------------------------------"); + System.out.println("getAttributeByName (summarized):"); + for (Map.Entry<String, String> m : plugin.attributes.entrySet()) { + String s = m.getKey() + " -> " + m.getValue(); + System.out.println(s); + } + } +} -- GitLab