diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java index 78a9f2cd93bb09bb0f60cca49f1ad19400d50ccc..9d43746d6010364540583a5a2a02bdee854754c0 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java @@ -16,7 +16,6 @@ limitations under the License. package org.slub.rosetta.dps.repository.plugin; - import com.exlibris.core.sdk.strings.StringUtils; import com.exlibris.dps.sdk.techmd.MDExtractorPlugin; import org.w3c.dom.Document; @@ -25,33 +24,25 @@ import org.w3c.dom.NodeList; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.TransformerFactoryConfigurationError; -import javax.xml.transform.stream.StreamResult; -import javax.xml.transform.stream.StreamSource; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathFactory; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.OutputStreamWriter; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; - /** * SLUBTechnicalMetadataExtractorMediaConchPlugin * @@ -64,7 +55,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract private String mediaconch_binary_path; private String mediaconch_profile_path; private String mediainfo_binary_path; - private final static String MEDIAINFO_XSD = "/transformer.xsl"; + private final static String xsltproc_binary_path = "/bin/xsltproc"; /* path in Exl */ + private final static String TRANSFORMER_XSL = "/resources/transformer.xsl"; private List<String> extractionErrors = new ArrayList<>(); private final List<String> validationLog = new ArrayList<>(); @@ -93,6 +85,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract + " mediaconch_binary_path=" + mediaconch_binary_path + " mediaconch_profile_path=" + mediaconch_profile_path + " mediainfo_binary_path=" + mediainfo_binary_path + + " (xsltproc_binary_path=" + xsltproc_binary_path + ", hardcoded)" ); } catch (Exception e) { e.printStackTrace(); @@ -125,19 +118,20 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract checkFileExists(this.mediaconch_binary_path); checkFileExists(this.mediaconch_profile_path); checkFileExists(this.mediainfo_binary_path); + //checkFileExists(xsltproc_binary_path); // mediaconch validation call_mediaconch(filePath); // mediainfo metadata extraction call_mediainfo_and_result_processing(filePath); } - private void call_mediainfo_and_result_processing(String filePath) throws Exception { - String execstring = this.mediainfo_binary_path + " -f --Output=XML " + filePath; - System.out.println("executing: " + execstring); + private void call_mediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { + String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from; + System.out.println("executing: " + exec_mediainfo_string); checkFileExists(this.mediainfo_binary_path); InputStreamReader process_out; try { - Process p = Runtime.getRuntime().exec(execstring); + Process p = Runtime.getRuntime().exec(exec_mediainfo_string); p.waitFor(); process_out = new InputStreamReader(p.getInputStream()); } catch (IOException e) { @@ -155,68 +149,77 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract String regex = "xsi:schemaLocation.*?>"; String line_patched = line.replaceAll(regex, ""); mediainfo_output.append(line_patched); - //mediainfo_output.append(line); + System.out.println("LINE (patched): " + line_patched); line = reader.readLine(); } reader.close(); - File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); - temp_media_outputfile.deleteOnExit(); - File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); - temp_media_transformed_outputfile.deleteOnExit(); - OutputStream temp_media_transformed_outputstream = new FileOutputStream(temp_media_transformed_outputfile); - OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); - OutputStreamWriter temp_media_streamwriter = new OutputStreamWriter(temp_media_outputstream); + FileWriter temp_media_outputstream = new FileWriter(temp_mediainfo_outputfile); + BufferedWriter temp_media_streamwriter = new BufferedWriter(temp_media_outputstream); temp_media_streamwriter.append(mediainfo_output); + temp_media_streamwriter.flush(); + temp_media_outputstream.flush(); + temp_media_outputstream.close(); temp_media_streamwriter.close(); - /* xslt transform */ - InputStream stylestream = getClass().getResourceAsStream(MEDIAINFO_XSD); - StreamSource stylesource = new StreamSource(stylestream); - stylesource.setSystemId("./resources" + MEDIAINFO_XSD); - System.out.println("stylesource, tempfile_raw=" + temp_media_outputfile.getAbsolutePath()); - System.out.println("stylesource, tempfile_transformed=" + temp_media_transformed_outputfile.getAbsolutePath()); - System.out.println("stylesource, systemID=" + stylesource.getSystemId()); - System.out.println("stylesource, publicID=" + stylesource.getPublicId()); - /* media info xml */ - InputStream mediastream = new FileInputStream(temp_media_outputfile); - StreamSource mediainfo_source = new StreamSource(mediastream); - mediainfo_source.setSystemId(temp_media_outputfile); - System.out.println("mediainfo_source, systemID=" + mediainfo_source.getSystemId()); - System.out.println("mediainfo_source, publicID=" + mediainfo_source.getPublicId()); - // Use a Transformer for output - TransformerFactory tFactory; - try { + } - tFactory = TransformerFactory.newInstance("com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl", ClassLoader.getPlatformClassLoader()); - //System.out.println("Factory module name:" + tFactory.getClass().getModule().getName()); - //tFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - //tFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - //tFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); - //tFactory.setFeature(XMLConstants.USE_CATALOG, false); - System.out.println("transformerFactory:" + System.getProperty("javax.xml.transform.TransformerFactory")); - try { - System.out.println("stylesource=" + stylesource); - assert (!stylesource.isEmpty()); - Transformer transformer = tFactory.newTransformer(stylesource); - assert (transformer != null); - System.out.println("transformer=" + transformer); - /* ok, mediainfo is loaded correctly, and xslt loaded too */ - /* debug output: */ - StreamResult result = new StreamResult(temp_media_transformed_outputstream); - System.out.println("result=" + result.getClass()); - transformer.transform(mediainfo_source, result); - } catch (TransformerConfigurationException e) { - System.err.println("TransformerConfigurationException" + e); - e.printStackTrace(); + private void call_mediainfo_and_result_processing(String filePath) throws Exception { + File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); + temp_media_outputfile.deleteOnExit(); + call_mediainfo(filePath, String.valueOf(temp_media_outputfile)); + if (temp_media_outputfile.length() > 0) { + File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); + temp_media_transformed_outputfile.deleteOnExit(); + //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); + transform_mediainfo_output(temp_media_outputfile, temp_media_transformed_outputfile); + if (temp_media_transformed_outputfile.length() > 0) { + /* TODO: read transformed outputfile and return attributes */ + extract_attributes_of_transformed_result(temp_media_transformed_outputfile); + } else { + throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken"); } - }catch (TransformerFactoryConfigurationError e) { - System.err.println("TransformerConfigurationError" + e); - e.printStackTrace(); + } else { + throw new Exception( "size of intermediate mediainfo file '" + temp_media_outputfile + "' is zero, something broken"); + } + temp_media_outputfile.delete(); + } + + private void transform_mediainfo_output(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { + /* write xsd */ + File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd"); + InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); + if (stylestream == null) { + throw new Exception("stylestream not found!"); } + /* + OutputStream out = new FileOutputStream(temp_xsdfile); + int read = 0; + byte[] buffer = new byte[8192]; + assert(stylestream != null); + stylestream.read( buffer ); + while((read =stylestream.read( buffer)) != -1) { + out.write(buffer, 0, read); + } + out.flush(); + out.close(); + */ - temp_media_transformed_outputstream.close(); - mediastream.close(); - /* TODO: read transformed outputfile and return attributes */ - extract_attributes_of_transformed_result(temp_media_transformed_outputfile); + Files.copy(stylestream, Path.of(String.valueOf(temp_xsdfile)), StandardCopyOption.REPLACE_EXISTING); + stylestream.close(); + /* xslt transform */ + String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile; + System.out.println("executing: " + exec_xsltproc_string); + InputStreamReader process_out; + try { + Process p = Runtime.getRuntime().exec(exec_xsltproc_string); + p.waitFor(); + process_out = new InputStreamReader(p.getInputStream()); + } catch (IOException e) { + //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); + System.out.println("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage()); + throw new Exception("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage()); + } finally { + temp_xsdfile.delete(); + } } private void extract_attributes_of_transformed_result(File temp_media_transformed_outputfile) throws Exception { @@ -238,7 +241,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } private void call_mediaconch(String filePath) throws Exception { - String execstring = this.mediaconch_binary_path + " " + filePath + " " + this.mediaconch_profile_path; + String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_profile_path; System.out.println("executing: " + execstring); InputStreamReader process_out; try { @@ -260,9 +263,13 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract BufferedReader reader = new BufferedReader(process_out); String line = reader.readLine(); while (line != null) { - System.out.println(line); + if (line.contains("pass!")) { + break; + } + System.out.println("MEDIACONCH line: " + line); validationLog.add(line); line = reader.readLine(); + } reader.close(); extractionErrors = validationLog; @@ -508,6 +515,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract } catch (Exception e) { System.out.println("CALLERROR:"); e.printStackTrace(); + return; } System.out.println("RESULT: " + plugin.isValid()); System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors());