From 286656564ff64201ec6529189cccb048e4b31775 Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <andreas.romeyke@slub-dresden.de> Date: Wed, 23 Feb 2022 11:09:09 +0100 Subject: [PATCH] - refactoring, splitted profile_path to current and upcoming profile path - refactoring, xsltproc_binary_path now customizeable - refactoring, changed getProfile() to use only hints about the current and upcoming profiles instead of providing the profile xml itself. This results in slightly smaller XML in AIP and is more descriptive. - refactoring, changed methods to Camel-style --- ...icalMetadataExtractorMediaConchPlugin.java | 116 ++++++++++++------ 1 file changed, 77 insertions(+), 39 deletions(-) diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java index e10dbfc..71b8cc0 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java @@ -21,7 +21,6 @@ import com.exlibris.dps.sdk.techmd.MDExtractorPlugin; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; - import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; @@ -35,57 +34,65 @@ import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.math.BigInteger; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; + /** * SLUBTechnicalMetadataExtractorMediaConchPlugin * * @author andreas.romeyke@slub-dresden.de (Andreas Romeyke) * @see com.exlibris.dps.sdk.techmd.MDExtractorPlugin */ -/*public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtractorPlugin { */ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtractorPlugin { - private String mediaconch_binary_path; - private String mediaconch_profile_path; + private String mediaconch_current_profile_path; + private String mediaconch_upcoming_profile_path; private String mediainfo_binary_path; - private final static String xsltproc_binary_path = "/bin/xsltproc"; /* path in Exl */ + private String xsltproc_binary_path; private final static String TRANSFORMER_XSL = "/resources/transformer.xsl"; - private List<String> extractionErrors = new ArrayList<>(); private final List<String> validationLog = new ArrayList<>(); private boolean isvalid = false; private boolean iswellformed = false; private final Map<String,String> attributes = new HashMap<>(); - //static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorMediaConchPlugin.class, ExLogger.VALIDATIONSTACK); + /** constructor */ public SLUBTechnicalMetadataExtractorMediaConchPlugin() { //log.info("SLUBVirusCheckPlugin instantiated with host=" + host + " port=" + port + " timeout=" + timeout); System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated"); } + /** init params to configure the plugin via xml forms * @param initp parameter map */ public void initParams(Map<String, String> initp) { this.mediaconch_binary_path = initp.get("mediaconch_binary_path").trim(); - this.mediaconch_profile_path = initp.get("mediaconch_profile_path").trim(); + this.mediaconch_current_profile_path = initp.get("mediaconch_current_profile_path").trim(); + this.mediaconch_upcoming_profile_path = initp.get("mediaconch_upcoming_profile_path").trim(); this.mediainfo_binary_path = initp.get("mediainfo_binary_path").trim(); + this.xsltproc_binary_path = initp.get("xsltproc_binary_path").trim(); try { checkFileExists(this.mediainfo_binary_path); checkFileExists(this.mediaconch_binary_path); - checkFileExists(this.mediaconch_profile_path); + checkFileExists(this.xsltproc_binary_path); + checkFileExists(this.mediaconch_current_profile_path); + checkFileExists(this.mediaconch_upcoming_profile_path); System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated with " + " mediaconch_binary_path=" + mediaconch_binary_path - + " mediaconch_profile_path=" + mediaconch_profile_path + + " mediaconch_current_profile_path=" + mediaconch_current_profile_path + + " mediaconch_upcoming_profile_path=" + mediaconch_upcoming_profile_path + " mediainfo_binary_path=" + mediainfo_binary_path - + " (xsltproc_binary_path=" + xsltproc_binary_path + ", hardcoded)" + + " xsltproc_binary_path=" + xsltproc_binary_path ); } catch (Exception e) { e.printStackTrace(); @@ -93,15 +100,23 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } @Override + // TODO: Pfadstring, Prüfsumme, modification date für beide Profile public String getProfile () { - String xmlstring = ""; - try { - xmlstring = Files.readString(Path.of(this.mediaconch_profile_path)); - } catch (IOException e) { - //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); - System.out.println("ERROR: mediaconch profile not available, path=" + this.mediaconch_profile_path + ", " + e.getMessage()); - } - return xmlstring; + String md5_current = md5SumOfFile( this.mediaconch_current_profile_path); + String modified_current = modificationDateOfFile( this.mediaconch_current_profile_path); + String md5_upcoming = md5SumOfFile( this.mediaconch_upcoming_profile_path); + String modified_upcoming = modificationDateOfFile( this.mediaconch_upcoming_profile_path); + /* there is no documentation in ExL API, therefore we use it to document the profile versions in a light way */ + return ( + "current profile:\n" + + " path=" + this.mediaconch_current_profile_path + "\n" + + " md5sum=" + md5_current + "\n" + + " modification date=" + modified_current + "\n" + + "upcoming profile:\n" + + " path=" + this.mediaconch_upcoming_profile_path + "\n" + + " md5sum=" + md5_upcoming + "\n" + + " modification date=" +modified_upcoming + "\n" + ); } @Override @@ -109,23 +124,23 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract if (StringUtils.isEmptyString(this.mediaconch_binary_path)) { throw new Exception("mediaconch_binary_path not found"); } - if (StringUtils.isEmptyString(this.mediaconch_profile_path)) { + if (StringUtils.isEmptyString(this.mediaconch_current_profile_path)) { throw new Exception("mediaconch_profile_path not found"); } if (StringUtils.isEmptyString(this.mediainfo_binary_path)) { throw new Exception("mediainfo_binary_path not found"); } checkFileExists(this.mediaconch_binary_path); - checkFileExists(this.mediaconch_profile_path); + checkFileExists(this.mediaconch_current_profile_path); checkFileExists(this.mediainfo_binary_path); //checkFileExists(xsltproc_binary_path); // mediaconch validation - call_mediaconch(filePath); + callMediaconch(filePath); // mediainfo metadata extraction - call_mediainfo_and_result_processing(filePath); + callMediainfoAndResultProcessing(filePath); } - private void call_mediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { + private void callMediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from; System.out.println("executing: " + exec_mediainfo_string); checkFileExists(this.mediainfo_binary_path); @@ -169,18 +184,18 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract temp_media_streamwriter.close(); } - private void call_mediainfo_and_result_processing(String filePath) throws Exception { + private void callMediainfoAndResultProcessing(String filePath) throws Exception { File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); temp_media_outputfile.deleteOnExit(); - call_mediainfo(filePath, String.valueOf(temp_media_outputfile)); + callMediainfo(filePath, String.valueOf(temp_media_outputfile)); if (temp_media_outputfile.length() > 0) { File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); temp_media_transformed_outputfile.deleteOnExit(); //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); - transform_mediainfo_output(temp_media_outputfile, temp_media_transformed_outputfile); + transformMediainfoOutput(temp_media_outputfile, temp_media_transformed_outputfile); if (temp_media_transformed_outputfile.length() > 0) { /* TODO: read transformed outputfile and return attributes */ - extract_attributes_of_transformed_result(temp_media_transformed_outputfile); + extractAttributesOfTransformedResult(temp_media_transformed_outputfile); } else { throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken"); } @@ -190,8 +205,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract temp_media_outputfile.delete(); } - private void transform_mediainfo_output(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { - File temp_xsdfile = create_temp_xsd_file(); + private void transformMediainfoOutput(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { + File temp_xsdfile = createTempXsdFile(); /* xslt transform */ String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile; System.out.println("executing: " + exec_xsltproc_string); @@ -209,7 +224,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } } - private File create_temp_xsd_file() throws Exception { + private File createTempXsdFile() throws Exception { /* write xsd */ File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd"); InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); @@ -221,7 +236,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract return temp_xsdfile; } - private void extract_attributes_of_transformed_result(File temp_media_transformed_outputfile) throws Exception { + private void extractAttributesOfTransformedResult(File temp_media_transformed_outputfile) throws Exception { checkFileExists(String.valueOf(temp_media_transformed_outputfile)); XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); @@ -239,8 +254,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } } - private void call_mediaconch(String filePath) throws Exception { - String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_profile_path; + private void callMediaconch(String filePath) throws Exception { + String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_current_profile_path; System.out.println("executing: " + execstring); InputStreamReader process_out; try { @@ -279,9 +294,9 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract return "mediaconch"; } - /** get clamd agent version and signature version calling clamd-command VERSION + /** get agent version and signature version calling command VERSION * - * @return string with clamd version and signature version + * @return string with version and signature version */ public String getAgent() { StringBuilder response = new StringBuilder(); @@ -417,14 +432,39 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract throw new Exception("ERROR: path=" + filename + " not available"); } } + + private String md5SumOfFile(String filename ) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] b = Files.readAllBytes(Paths.get(filename)); + byte[] digest = md.digest(b); + String hexdigest = new BigInteger(1, digest).toString(16); + return hexdigest; + } catch (Exception e) { + e.printStackTrace(); + } + return ""; + } + + private String modificationDateOfFile(String filename ) { + try { + return Files.getLastModifiedTime(Paths.get(filename)).toString(); + } catch (IOException e) { + e.printStackTrace(); + } + return ""; + } + /** stand-alone check, main file to call local installed clamd * @param args list of files which should be scanned */ public static void main(String[] args) { SLUBTechnicalMetadataExtractorMediaConchPlugin plugin = new SLUBTechnicalMetadataExtractorMediaConchPlugin(); Map<String, String> initp = new HashMap<>(); + initp.put( "xsltproc_binary_path", "/usr/bin/xsltproc"); initp.put( "mediaconch_binary_path", "/usr/bin/mediaconch"); - initp.put( "mediaconch_profile_path", "/etc/mediaconch/profile.xml"); + initp.put( "mediaconch_current_profile_path", "/etc/mediaconch/profile.xml"); + initp.put( "mediaconch_upcoming_profile_path", "/etc/mediaconch/profile.xml"); initp.put( "mediainfo_binary_path", "/usr/bin/mediainfo"); plugin.initParams( initp ); System.out.println("----------------------------------"); @@ -471,5 +511,3 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } } } - - -- GitLab