Skip to content
Snippets Groups Projects
Commit b0668ded authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- because issues in class loading in Rosetta, the working code was

  rewritten to use xsltproc as external called programm to transform
  output. The path to xsltproc was hard encoded, because it is part of
  ExL rosetta rollout.
- HINT: the plugin could be tested as dps-user by calling:
  java -cp /exlibris/dps/d4_1/system.dir/dps-sdk-7.1.0/\
  lib/dps-sdk-7.1.0.jar:/exlibris/dps/d4_1/system.dir/dps-sdk-7.1.0\
  /dps-sdk-projects/dps-sdk-deposit/lib/log4j-1.2.14.jar:/exlibris/dps\
  /d4_1/system.dir/dps-sdk-7.1.0//dps-sdk-projects/dps-sdk-deposit/lib\
  /commons-codec-1.10.jar:/exlibris/dps/d4_1/system.dir/dps-sdk-7.1.0/\
  dps-sdk-projects/dps-sdk-deposit/lib/xmlbeans-2.3.0.jar:/usr/share/\
  java/commons-lang.jar:/operational_shared/plugins/custom/\
  SLUBTechnicalMetadataExtractorMediaConchPlugin.jar \
  org.slub.rosetta.dps.repository.plugin.SLUBTechnicalMetadataExtractorMediaConchPlugin \
  /permanent_storage/normal/2021/06/14/IE19503/V1-FL19505.mkv
  (for example on dev)
parent 575808b4
No related branches found
No related tags found
No related merge requests found
...@@ -16,7 +16,6 @@ limitations under the License. ...@@ -16,7 +16,6 @@ limitations under the License.
package org.slub.rosetta.dps.repository.plugin; package org.slub.rosetta.dps.repository.plugin;
import com.exlibris.core.sdk.strings.StringUtils; import com.exlibris.core.sdk.strings.StringUtils;
import com.exlibris.dps.sdk.techmd.MDExtractorPlugin; import com.exlibris.dps.sdk.techmd.MDExtractorPlugin;
import org.w3c.dom.Document; import org.w3c.dom.Document;
...@@ -25,33 +24,25 @@ import org.w3c.dom.NodeList; ...@@ -25,33 +24,25 @@ import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPath; import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory; import javax.xml.xpath.XPathFactory;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileWriter;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* SLUBTechnicalMetadataExtractorMediaConchPlugin * SLUBTechnicalMetadataExtractorMediaConchPlugin
* *
...@@ -64,7 +55,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -64,7 +55,8 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
private String mediaconch_binary_path; private String mediaconch_binary_path;
private String mediaconch_profile_path; private String mediaconch_profile_path;
private String mediainfo_binary_path; private String mediainfo_binary_path;
private final static String MEDIAINFO_XSD = "/transformer.xsl"; private final static String xsltproc_binary_path = "/bin/xsltproc"; /* path in Exl */
private final static String TRANSFORMER_XSL = "/resources/transformer.xsl";
private List<String> extractionErrors = new ArrayList<>(); private List<String> extractionErrors = new ArrayList<>();
private final List<String> validationLog = new ArrayList<>(); private final List<String> validationLog = new ArrayList<>();
...@@ -93,6 +85,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -93,6 +85,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
+ " mediaconch_binary_path=" + mediaconch_binary_path + " mediaconch_binary_path=" + mediaconch_binary_path
+ " mediaconch_profile_path=" + mediaconch_profile_path + " mediaconch_profile_path=" + mediaconch_profile_path
+ " mediainfo_binary_path=" + mediainfo_binary_path + " mediainfo_binary_path=" + mediainfo_binary_path
+ " (xsltproc_binary_path=" + xsltproc_binary_path + ", hardcoded)"
); );
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
...@@ -125,19 +118,20 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -125,19 +118,20 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
checkFileExists(this.mediaconch_binary_path); checkFileExists(this.mediaconch_binary_path);
checkFileExists(this.mediaconch_profile_path); checkFileExists(this.mediaconch_profile_path);
checkFileExists(this.mediainfo_binary_path); checkFileExists(this.mediainfo_binary_path);
//checkFileExists(xsltproc_binary_path);
// mediaconch validation // mediaconch validation
call_mediaconch(filePath); call_mediaconch(filePath);
// mediainfo metadata extraction // mediainfo metadata extraction
call_mediainfo_and_result_processing(filePath); call_mediainfo_and_result_processing(filePath);
} }
private void call_mediainfo_and_result_processing(String filePath) throws Exception { private void call_mediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception {
String execstring = this.mediainfo_binary_path + " -f --Output=XML " + filePath; String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from;
System.out.println("executing: " + execstring); System.out.println("executing: " + exec_mediainfo_string);
checkFileExists(this.mediainfo_binary_path); checkFileExists(this.mediainfo_binary_path);
InputStreamReader process_out; InputStreamReader process_out;
try { try {
Process p = Runtime.getRuntime().exec(execstring); Process p = Runtime.getRuntime().exec(exec_mediainfo_string);
p.waitFor(); p.waitFor();
process_out = new InputStreamReader(p.getInputStream()); process_out = new InputStreamReader(p.getInputStream());
} catch (IOException e) { } catch (IOException e) {
...@@ -155,68 +149,77 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -155,68 +149,77 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
String regex = "xsi:schemaLocation.*?>"; String regex = "xsi:schemaLocation.*?>";
String line_patched = line.replaceAll(regex, ""); String line_patched = line.replaceAll(regex, "");
mediainfo_output.append(line_patched); mediainfo_output.append(line_patched);
//mediainfo_output.append(line); System.out.println("LINE (patched): " + line_patched);
line = reader.readLine(); line = reader.readLine();
} }
reader.close(); reader.close();
FileWriter temp_media_outputstream = new FileWriter(temp_mediainfo_outputfile);
BufferedWriter temp_media_streamwriter = new BufferedWriter(temp_media_outputstream);
temp_media_streamwriter.append(mediainfo_output);
temp_media_streamwriter.flush();
temp_media_outputstream.flush();
temp_media_outputstream.close();
temp_media_streamwriter.close();
}
private void call_mediainfo_and_result_processing(String filePath) throws Exception {
File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml");
temp_media_outputfile.deleteOnExit(); temp_media_outputfile.deleteOnExit();
call_mediainfo(filePath, String.valueOf(temp_media_outputfile));
if (temp_media_outputfile.length() > 0) {
File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml");
temp_media_transformed_outputfile.deleteOnExit(); temp_media_transformed_outputfile.deleteOnExit();
OutputStream temp_media_transformed_outputstream = new FileOutputStream(temp_media_transformed_outputfile); //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile);
OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); transform_mediainfo_output(temp_media_outputfile, temp_media_transformed_outputfile);
OutputStreamWriter temp_media_streamwriter = new OutputStreamWriter(temp_media_outputstream); if (temp_media_transformed_outputfile.length() > 0) {
temp_media_streamwriter.append(mediainfo_output); /* TODO: read transformed outputfile and return attributes */
temp_media_streamwriter.close(); extract_attributes_of_transformed_result(temp_media_transformed_outputfile);
/* xslt transform */ } else {
InputStream stylestream = getClass().getResourceAsStream(MEDIAINFO_XSD); throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken");
StreamSource stylesource = new StreamSource(stylestream);
stylesource.setSystemId("./resources" + MEDIAINFO_XSD);
System.out.println("stylesource, tempfile_raw=" + temp_media_outputfile.getAbsolutePath());
System.out.println("stylesource, tempfile_transformed=" + temp_media_transformed_outputfile.getAbsolutePath());
System.out.println("stylesource, systemID=" + stylesource.getSystemId());
System.out.println("stylesource, publicID=" + stylesource.getPublicId());
/* media info xml */
InputStream mediastream = new FileInputStream(temp_media_outputfile);
StreamSource mediainfo_source = new StreamSource(mediastream);
mediainfo_source.setSystemId(temp_media_outputfile);
System.out.println("mediainfo_source, systemID=" + mediainfo_source.getSystemId());
System.out.println("mediainfo_source, publicID=" + mediainfo_source.getPublicId());
// Use a Transformer for output
TransformerFactory tFactory;
try {
tFactory = TransformerFactory.newInstance("com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl", ClassLoader.getPlatformClassLoader());
//System.out.println("Factory module name:" + tFactory.getClass().getModule().getName());
//tFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
//tFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
//tFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
//tFactory.setFeature(XMLConstants.USE_CATALOG, false);
System.out.println("transformerFactory:" + System.getProperty("javax.xml.transform.TransformerFactory"));
try {
System.out.println("stylesource=" + stylesource);
assert (!stylesource.isEmpty());
Transformer transformer = tFactory.newTransformer(stylesource);
assert (transformer != null);
System.out.println("transformer=" + transformer);
/* ok, mediainfo is loaded correctly, and xslt loaded too */
/* debug output: */
StreamResult result = new StreamResult(temp_media_transformed_outputstream);
System.out.println("result=" + result.getClass());
transformer.transform(mediainfo_source, result);
} catch (TransformerConfigurationException e) {
System.err.println("TransformerConfigurationException" + e);
e.printStackTrace();
} }
}catch (TransformerFactoryConfigurationError e) { } else {
System.err.println("TransformerConfigurationError" + e); throw new Exception( "size of intermediate mediainfo file '" + temp_media_outputfile + "' is zero, something broken");
e.printStackTrace(); }
temp_media_outputfile.delete();
} }
temp_media_transformed_outputstream.close(); private void transform_mediainfo_output(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception {
mediastream.close(); /* write xsd */
/* TODO: read transformed outputfile and return attributes */ File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd");
extract_attributes_of_transformed_result(temp_media_transformed_outputfile); InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL);
if (stylestream == null) {
throw new Exception("stylestream not found!");
}
/*
OutputStream out = new FileOutputStream(temp_xsdfile);
int read = 0;
byte[] buffer = new byte[8192];
assert(stylestream != null);
stylestream.read( buffer );
while((read =stylestream.read( buffer)) != -1) {
out.write(buffer, 0, read);
}
out.flush();
out.close();
*/
Files.copy(stylestream, Path.of(String.valueOf(temp_xsdfile)), StandardCopyOption.REPLACE_EXISTING);
stylestream.close();
/* xslt transform */
String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile;
System.out.println("executing: " + exec_xsltproc_string);
InputStreamReader process_out;
try {
Process p = Runtime.getRuntime().exec(exec_xsltproc_string);
p.waitFor();
process_out = new InputStreamReader(p.getInputStream());
} catch (IOException e) {
//log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e);
System.out.println("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage());
throw new Exception("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage());
} finally {
temp_xsdfile.delete();
}
} }
private void extract_attributes_of_transformed_result(File temp_media_transformed_outputfile) throws Exception { private void extract_attributes_of_transformed_result(File temp_media_transformed_outputfile) throws Exception {
...@@ -238,7 +241,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -238,7 +241,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
} }
private void call_mediaconch(String filePath) throws Exception { private void call_mediaconch(String filePath) throws Exception {
String execstring = this.mediaconch_binary_path + " " + filePath + " " + this.mediaconch_profile_path; String execstring = this.mediaconch_binary_path + " " + filePath + " -p " + this.mediaconch_profile_path;
System.out.println("executing: " + execstring); System.out.println("executing: " + execstring);
InputStreamReader process_out; InputStreamReader process_out;
try { try {
...@@ -260,9 +263,13 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -260,9 +263,13 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
BufferedReader reader = new BufferedReader(process_out); BufferedReader reader = new BufferedReader(process_out);
String line = reader.readLine(); String line = reader.readLine();
while (line != null) { while (line != null) {
System.out.println(line); if (line.contains("pass!")) {
break;
}
System.out.println("MEDIACONCH line: " + line);
validationLog.add(line); validationLog.add(line);
line = reader.readLine(); line = reader.readLine();
} }
reader.close(); reader.close();
extractionErrors = validationLog; extractionErrors = validationLog;
...@@ -508,6 +515,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract ...@@ -508,6 +515,7 @@ public class SLUBTechnicalMetadataExtractorMediaConchPlugin implements MDExtract
} catch (Exception e) { } catch (Exception e) {
System.out.println("CALLERROR:"); System.out.println("CALLERROR:");
e.printStackTrace(); e.printStackTrace();
return;
} }
System.out.println("RESULT: " + plugin.isValid()); System.out.println("RESULT: " + plugin.isValid());
System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors()); System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors());
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment