diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java index e10dbfc0d4f601e2c55135b72da56a55c63723b4..71b8cc09fc5275608352bd9b8de370b61da1fe38 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java @@ -21,7 +21,6 @@ import com.exlibris.dps.sdk.techmd.MDExtractorPlugin; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; - import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; @@ -35,57 +34,65 @@ import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.math.BigInteger; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; + /** * SLUBTechnicalMetadataExtractorMediaConchPlugin * * @author andreas.romeyke@slub-dresden.de (Andreas Romeyke) * @see com.exlibris.dps.sdk.techmd.MDExtractorPlugin */ -/*public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtractorPlugin { */ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtractorPlugin { - private String mediaconch_binary_path; - private String mediaconch_profile_path; + private String mediaconch_current_profile_path; + private String mediaconch_upcoming_profile_path; private String mediainfo_binary_path; - private final static String xsltproc_binary_path = "/bin/xsltproc"; /* path in Exl */ + private String xsltproc_binary_path; private final static String TRANSFORMER_XSL = "/resources/transformer.xsl"; - private List<String> extractionErrors = new ArrayList<>(); private final List<String> validationLog = new ArrayList<>(); private boolean isvalid = false; private boolean iswellformed = false; private final Map<String,String> attributes = new HashMap<>(); - //static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorMediaConchPlugin.class, ExLogger.VALIDATIONSTACK); + /** constructor */ public SLUBTechnicalMetadataExtractorMediaConchPlugin() { //log.info("SLUBVirusCheckPlugin instantiated with host=" + host + " port=" + port + " timeout=" + timeout); System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated"); } + /** init params to configure the plugin via xml forms * @param initp parameter map */ public void initParams(Map<String, String> initp) { this.mediaconch_binary_path = initp.get("mediaconch_binary_path").trim(); - this.mediaconch_profile_path = initp.get("mediaconch_profile_path").trim(); + this.mediaconch_current_profile_path = initp.get("mediaconch_current_profile_path").trim(); + this.mediaconch_upcoming_profile_path = initp.get("mediaconch_upcoming_profile_path").trim(); this.mediainfo_binary_path = initp.get("mediainfo_binary_path").trim(); + this.xsltproc_binary_path = initp.get("xsltproc_binary_path").trim(); try { checkFileExists(this.mediainfo_binary_path); checkFileExists(this.mediaconch_binary_path); - checkFileExists(this.mediaconch_profile_path); + checkFileExists(this.xsltproc_binary_path); + checkFileExists(this.mediaconch_current_profile_path); + checkFileExists(this.mediaconch_upcoming_profile_path); System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated with " + " mediaconch_binary_path=" + mediaconch_binary_path - + " mediaconch_profile_path=" + mediaconch_profile_path + + " mediaconch_current_profile_path=" + mediaconch_current_profile_path + + " mediaconch_upcoming_profile_path=" + mediaconch_upcoming_profile_path + " mediainfo_binary_path=" + mediainfo_binary_path - + " (xsltproc_binary_path=" + xsltproc_binary_path + ", hardcoded)" + + " xsltproc_binary_path=" + xsltproc_binary_path ); } catch (Exception e) { e.printStackTrace(); @@ -93,15 +100,23 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } @Override + // TODO: Pfadstring, Prüfsumme, modification date für beide Profile public String getProfile () { - String xmlstring = ""; - try { - xmlstring = Files.readString(Path.of(this.mediaconch_profile_path)); - } catch (IOException e) { - //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); - System.out.println("ERROR: mediaconch profile not available, path=" + this.mediaconch_profile_path + ", " + e.getMessage()); - } - return xmlstring; + String md5_current = md5SumOfFile( this.mediaconch_current_profile_path); + String modified_current = modificationDateOfFile( this.mediaconch_current_profile_path); + String md5_upcoming = md5SumOfFile( this.mediaconch_upcoming_profile_path); + String modified_upcoming = modificationDateOfFile( this.mediaconch_upcoming_profile_path); + /* there is no documentation in ExL API, therefore we use it to document the profile versions in a light way */ + return ( + "current profile:\n" + + " path=" + this.mediaconch_current_profile_path + "\n" + + " md5sum=" + md5_current + "\n" + + " modification date=" + modified_current + "\n" + + "upcoming profile:\n" + + " path=" + this.mediaconch_upcoming_profile_path + "\n" + + " md5sum=" + md5_upcoming + "\n" + + " modification date=" +modified_upcoming + "\n" + ); } @Override @@ -109,23 +124,23 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract if (StringUtils.isEmptyString(this.mediaconch_binary_path)) { throw new Exception("mediaconch_binary_path not found"); } - if (StringUtils.isEmptyString(this.mediaconch_profile_path)) { + if (StringUtils.isEmptyString(this.mediaconch_current_profile_path)) { throw new Exception("mediaconch_profile_path not found"); } if (StringUtils.isEmptyString(this.mediainfo_binary_path)) { throw new Exception("mediainfo_binary_path not found"); } checkFileExists(this.mediaconch_binary_path); - checkFileExists(this.mediaconch_profile_path); + checkFileExists(this.mediaconch_current_profile_path); checkFileExists(this.mediainfo_binary_path); //checkFileExists(xsltproc_binary_path); // mediaconch validation - call_mediaconch(filePath); + callMediaconch(filePath); // mediainfo metadata extraction - call_mediainfo_and_result_processing(filePath); + callMediainfoAndResultProcessing(filePath); } - private void call_mediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { + private void callMediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from; System.out.println("executing: " + exec_mediainfo_string); checkFileExists(this.mediainfo_binary_path); @@ -169,18 +184,18 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract temp_media_streamwriter.close(); } - private void call_mediainfo_and_result_processing(String filePath) throws Exception { + private void callMediainfoAndResultProcessing(String filePath) throws Exception { File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); temp_media_outputfile.deleteOnExit(); - call_mediainfo(filePath, String.valueOf(temp_media_outputfile)); + callMediainfo(filePath, String.valueOf(temp_media_outputfile)); if (temp_media_outputfile.length() > 0) { File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); temp_media_transformed_outputfile.deleteOnExit(); //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); - transform_mediainfo_output(temp_media_outputfile, temp_media_transformed_outputfile); + transformMediainfoOutput(temp_media_outputfile, temp_media_transformed_outputfile); if (temp_media_transformed_outputfile.length() > 0) { /* TODO: read transformed outputfile and return attributes */ - extract_attributes_of_transformed_result(temp_media_transformed_outputfile); + extractAttributesOfTransformedResult(temp_media_transformed_outputfile); } else { throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken"); } @@ -190,8 +205,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract temp_media_outputfile.delete(); } - private void transform_mediainfo_output(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { - File temp_xsdfile = create_temp_xsd_file(); + private void transformMediainfoOutput(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { + File temp_xsdfile = createTempXsdFile(); /* xslt transform */ String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile; System.out.println("executing: " + exec_xsltproc_string); @@ -209,7 +224,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } } - private File create_temp_xsd_file() throws Exception { + private File createTempXsdFile() throws Exception { /* write xsd */ File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd"); InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); @@ -221,7 +236,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract return temp_xsdfile; } - private void extract_attributes_of_transformed_result(File temp_media_transformed_outputfile) throws Exception { + private void extractAttributesOfTransformedResult(File temp_media_transformed_outputfile) throws Exception { checkFileExists(String.valueOf(temp_media_transformed_outputfile)); XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); @@ -239,8 +254,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } } - private void call_mediaconch(String filePath) throws Exception { - String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_profile_path; + private void callMediaconch(String filePath) throws Exception { + String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_current_profile_path; System.out.println("executing: " + execstring); InputStreamReader process_out; try { @@ -279,9 +294,9 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract return "mediaconch"; } - /** get clamd agent version and signature version calling clamd-command VERSION + /** get agent version and signature version calling command VERSION * - * @return string with clamd version and signature version + * @return string with version and signature version */ public String getAgent() { StringBuilder response = new StringBuilder(); @@ -417,14 +432,39 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract throw new Exception("ERROR: path=" + filename + " not available"); } } + + private String md5SumOfFile(String filename ) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] b = Files.readAllBytes(Paths.get(filename)); + byte[] digest = md.digest(b); + String hexdigest = new BigInteger(1, digest).toString(16); + return hexdigest; + } catch (Exception e) { + e.printStackTrace(); + } + return ""; + } + + private String modificationDateOfFile(String filename ) { + try { + return Files.getLastModifiedTime(Paths.get(filename)).toString(); + } catch (IOException e) { + e.printStackTrace(); + } + return ""; + } + /** stand-alone check, main file to call local installed clamd * @param args list of files which should be scanned */ public static void main(String[] args) { SLUBTechnicalMetadataExtractorMediaConchPlugin plugin = new SLUBTechnicalMetadataExtractorMediaConchPlugin(); Map<String, String> initp = new HashMap<>(); + initp.put( "xsltproc_binary_path", "/usr/bin/xsltproc"); initp.put( "mediaconch_binary_path", "/usr/bin/mediaconch"); - initp.put( "mediaconch_profile_path", "/etc/mediaconch/profile.xml"); + initp.put( "mediaconch_current_profile_path", "/etc/mediaconch/profile.xml"); + initp.put( "mediaconch_upcoming_profile_path", "/etc/mediaconch/profile.xml"); initp.put( "mediainfo_binary_path", "/usr/bin/mediainfo"); plugin.initParams( initp ); System.out.println("----------------------------------"); @@ -471,5 +511,3 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } } } - -