From b0668ded975d9e0231dd94772b9d9066050f6b03 Mon Sep 17 00:00:00 2001
From: Andreas Romeyke <andreas.romeyke@slub-dresden.de>
Date: Fri, 4 Feb 2022 17:45:29 +0100
Subject: [PATCH] - because issues in class loading in Rosetta, the working
 code was   rewritten to use xsltproc as external called programm to transform
   output. The path to xsltproc was hard encoded, because it is part of   ExL
 rosetta rollout. - HINT: the plugin could be tested as dps-user by calling:  
 java -cp /exlibris/dps/d4_1/system.dir/dps-sdk-7.1.0/\  
 lib/dps-sdk-7.1.0.jar:/exlibris/dps/d4_1/system.dir/dps-sdk-7.1.0\  
 /dps-sdk-projects/dps-sdk-deposit/lib/log4j-1.2.14.jar:/exlibris/dps\  
 /d4_1/system.dir/dps-sdk-7.1.0//dps-sdk-projects/dps-sdk-deposit/lib\  
 /commons-codec-1.10.jar:/exlibris/dps/d4_1/system.dir/dps-sdk-7.1.0/\  
 dps-sdk-projects/dps-sdk-deposit/lib/xmlbeans-2.3.0.jar:/usr/share/\  
 java/commons-lang.jar:/operational_shared/plugins/custom/\  
 SLUBTechnicalMetadataExtractorMediaConchPlugin.jar \  
 org.slub.rosetta.dps.repository.plugin.SLUBTechnicalMetadataExtractorMediaConchPlugin
 \   /permanent_storage/normal/2021/06/14/IE19503/V1-FL19505.mkv   (for
 example on dev)

---
 ...icalMetadataExtractorMediaConchPlugin.java | 152 +++++++++---------
 1 file changed, 80 insertions(+), 72 deletions(-)

diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java
index 78a9f2c..9d43746 100644
--- a/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java
+++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBTechnicalMetadataExtractorMediaConchPlugin.java
@@ -16,7 +16,6 @@ limitations under the License.
 
 package org.slub.rosetta.dps.repository.plugin;
 
-
 import com.exlibris.core.sdk.strings.StringUtils;
 import com.exlibris.dps.sdk.techmd.MDExtractorPlugin;
 import org.w3c.dom.Document;
@@ -25,33 +24,25 @@ import org.w3c.dom.NodeList;
 
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerConfigurationException;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.TransformerFactoryConfigurationError;
-import javax.xml.transform.stream.StreamResult;
-import javax.xml.transform.stream.StreamSource;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
 import javax.xml.xpath.XPathFactory;
 import java.io.BufferedReader;
+import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
 /**
  * SLUBTechnicalMetadataExtractorMediaConchPlugin
  *
@@ -64,7 +55,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
     private String mediaconch_binary_path;
     private String mediaconch_profile_path;
     private String mediainfo_binary_path;
-    private final static String MEDIAINFO_XSD = "/transformer.xsl";
+    private final static String xsltproc_binary_path = "/bin/xsltproc"; /* path in Exl */
+    private final static String TRANSFORMER_XSL = "/resources/transformer.xsl";
 
     private List<String> extractionErrors = new ArrayList<>();
     private final List<String> validationLog = new ArrayList<>();
@@ -93,6 +85,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
                     + " mediaconch_binary_path=" + mediaconch_binary_path
                     + " mediaconch_profile_path=" + mediaconch_profile_path
                     + " mediainfo_binary_path=" + mediainfo_binary_path
+                    + " (xsltproc_binary_path=" + xsltproc_binary_path + ", hardcoded)"
             );
         } catch (Exception e) {
             e.printStackTrace();
@@ -125,19 +118,20 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
         checkFileExists(this.mediaconch_binary_path);
         checkFileExists(this.mediaconch_profile_path);
         checkFileExists(this.mediainfo_binary_path);
+        //checkFileExists(xsltproc_binary_path);
         // mediaconch validation
         call_mediaconch(filePath);
         // mediainfo metadata extraction
         call_mediainfo_and_result_processing(filePath);
     }
 
-    private void call_mediainfo_and_result_processing(String filePath) throws Exception {
-        String execstring = this.mediainfo_binary_path + " -f --Output=XML " + filePath;
-        System.out.println("executing: " + execstring);
+    private void call_mediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception {
+        String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from;
+        System.out.println("executing: " + exec_mediainfo_string);
         checkFileExists(this.mediainfo_binary_path);
         InputStreamReader process_out;
         try {
-            Process p = Runtime.getRuntime().exec(execstring);
+            Process p = Runtime.getRuntime().exec(exec_mediainfo_string);
             p.waitFor();
             process_out = new InputStreamReader(p.getInputStream());
         } catch (IOException e) {
@@ -155,68 +149,77 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
             String regex = "xsi:schemaLocation.*?>";
             String line_patched = line.replaceAll(regex, "");
             mediainfo_output.append(line_patched);
-            //mediainfo_output.append(line);
+            System.out.println("LINE (patched): " + line_patched);
             line = reader.readLine();
         }
         reader.close();
-        File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml");
-        temp_media_outputfile.deleteOnExit();
-        File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml");
-        temp_media_transformed_outputfile.deleteOnExit();
-        OutputStream temp_media_transformed_outputstream = new FileOutputStream(temp_media_transformed_outputfile);
-        OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile);
-        OutputStreamWriter temp_media_streamwriter = new OutputStreamWriter(temp_media_outputstream);
+        FileWriter temp_media_outputstream = new FileWriter(temp_mediainfo_outputfile);
+        BufferedWriter temp_media_streamwriter = new BufferedWriter(temp_media_outputstream);
         temp_media_streamwriter.append(mediainfo_output);
+        temp_media_streamwriter.flush();
+        temp_media_outputstream.flush();
+        temp_media_outputstream.close();
         temp_media_streamwriter.close();
-        /* xslt transform */
-        InputStream stylestream = getClass().getResourceAsStream(MEDIAINFO_XSD);
-        StreamSource stylesource = new StreamSource(stylestream);
-        stylesource.setSystemId("./resources" + MEDIAINFO_XSD);
-        System.out.println("stylesource, tempfile_raw=" + temp_media_outputfile.getAbsolutePath());
-        System.out.println("stylesource, tempfile_transformed=" + temp_media_transformed_outputfile.getAbsolutePath());
-        System.out.println("stylesource, systemID=" + stylesource.getSystemId());
-        System.out.println("stylesource, publicID=" + stylesource.getPublicId());
-        /* media info xml */
-        InputStream mediastream = new FileInputStream(temp_media_outputfile);
-        StreamSource mediainfo_source = new StreamSource(mediastream);
-        mediainfo_source.setSystemId(temp_media_outputfile);
-        System.out.println("mediainfo_source, systemID=" + mediainfo_source.getSystemId());
-        System.out.println("mediainfo_source, publicID=" + mediainfo_source.getPublicId());
-        // Use a Transformer for output
-        TransformerFactory tFactory;
-        try {
+    }
 
-            tFactory = TransformerFactory.newInstance("com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl", ClassLoader.getPlatformClassLoader());
-            //System.out.println("Factory module name:" + tFactory.getClass().getModule().getName());
-            //tFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-            //tFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
-            //tFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
-            //tFactory.setFeature(XMLConstants.USE_CATALOG, false);
-            System.out.println("transformerFactory:" + System.getProperty("javax.xml.transform.TransformerFactory"));
-            try {
-                System.out.println("stylesource=" + stylesource);
-                assert (!stylesource.isEmpty());
-                Transformer transformer = tFactory.newTransformer(stylesource);
-                assert (transformer != null);
-                System.out.println("transformer=" + transformer);
-                /* ok, mediainfo is loaded correctly, and xslt loaded too */
-                /* debug output: */
-                StreamResult result = new StreamResult(temp_media_transformed_outputstream);
-                System.out.println("result=" + result.getClass());
-                transformer.transform(mediainfo_source, result);
-            } catch (TransformerConfigurationException e) {
-                System.err.println("TransformerConfigurationException" + e);
-                e.printStackTrace();
+    private void call_mediainfo_and_result_processing(String filePath) throws Exception {
+        File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml");
+        temp_media_outputfile.deleteOnExit();
+        call_mediainfo(filePath, String.valueOf(temp_media_outputfile));
+        if (temp_media_outputfile.length() > 0) {
+            File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml");
+            temp_media_transformed_outputfile.deleteOnExit();
+            //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile);
+            transform_mediainfo_output(temp_media_outputfile, temp_media_transformed_outputfile);
+            if (temp_media_transformed_outputfile.length() > 0) {
+                /* TODO: read transformed outputfile and return attributes */
+                extract_attributes_of_transformed_result(temp_media_transformed_outputfile);
+            } else {
+                throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken");
             }
-        }catch (TransformerFactoryConfigurationError e) {
-            System.err.println("TransformerConfigurationError" + e);
-            e.printStackTrace();
+        } else {
+            throw new Exception( "size of intermediate mediainfo file '" + temp_media_outputfile + "' is zero, something broken");
+        }
+        temp_media_outputfile.delete();
+    }
+
+    private void transform_mediainfo_output(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception {
+                /* write xsd */
+        File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd");
+        InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL);
+        if (stylestream == null) {
+            throw new Exception("stylestream not found!");
         }
+        /*
+        OutputStream out = new FileOutputStream(temp_xsdfile);
+        int read = 0;
+        byte[] buffer = new byte[8192];
+        assert(stylestream != null);
+        stylestream.read( buffer );
+        while((read =stylestream.read( buffer)) != -1) {
+            out.write(buffer, 0, read);
+        }
+        out.flush();
+        out.close();
+        */
 
-        temp_media_transformed_outputstream.close();
-        mediastream.close();
-        /* TODO: read transformed outputfile and return attributes */
-        extract_attributes_of_transformed_result(temp_media_transformed_outputfile);
+        Files.copy(stylestream, Path.of(String.valueOf(temp_xsdfile)), StandardCopyOption.REPLACE_EXISTING);
+        stylestream.close();
+        /* xslt transform */
+        String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile;
+        System.out.println("executing: " + exec_xsltproc_string);
+        InputStreamReader process_out;
+        try {
+            Process p = Runtime.getRuntime().exec(exec_xsltproc_string);
+            p.waitFor();
+            process_out = new InputStreamReader(p.getInputStream());
+        } catch (IOException e) {
+            //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e);
+            System.out.println("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage());
+            throw new Exception("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage());
+        } finally {
+            temp_xsdfile.delete();
+        }
     }
 
     private void extract_attributes_of_transformed_result(File temp_media_transformed_outputfile) throws Exception {
@@ -238,7 +241,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
     }
 
     private void call_mediaconch(String filePath) throws Exception {
-        String execstring = this.mediaconch_binary_path + " " + filePath + " " + this.mediaconch_profile_path;
+        String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_profile_path;
         System.out.println("executing: " + execstring);
         InputStreamReader process_out;
         try {
@@ -260,9 +263,13 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
         BufferedReader reader = new BufferedReader(process_out);
         String line = reader.readLine();
         while (line != null) {
-            System.out.println(line);
+            if (line.contains("pass!")) {
+                break;
+            }
+            System.out.println("MEDIACONCH line: " + line);
             validationLog.add(line);
             line = reader.readLine();
+
         }
         reader.close();
         extractionErrors = validationLog;
@@ -508,6 +515,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
             } catch (Exception e) {
                 System.out.println("CALLERROR:");
                 e.printStackTrace();
+                return;
             }
             System.out.println("RESULT: " + plugin.isValid());
             System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors());
-- 
GitLab