diff --git a/META-INF/MANIFEST.MF b/META-INF/MANIFEST.MF new file mode 100644 index 0000000000000000000000000000000000000000..9483aa915b50f13ea24059b086ca8190d31b60a6 --- /dev/null +++ b/META-INF/MANIFEST.MF @@ -0,0 +1,3 @@ +Main-Class: org.slub.rosetta.dps.repository.plugin.TechnicalMetadataExtractorMediaInfoPlugin + + diff --git a/META-INF/services/javax.xml.transform.TransformerFactory b/META-INF/services/javax.xml.transform.TransformerFactory new file mode 100644 index 0000000000000000000000000000000000000000..8e877dca93fce2677a1ffb23bc542eb2cf928a84 --- /dev/null +++ b/META-INF/services/javax.xml.transform.TransformerFactory @@ -0,0 +1 @@ +org.apache.xalan.processor.TransformerFactoryImpl diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4292e0af15dc3e0a577827abdc9a1cf7c8182aa8 --- /dev/null +++ b/Makefile @@ -0,0 +1,99 @@ +#!/bin/bash +# Andreas Romeyke, SLUB Dresden + +# Pfad zu Java 11 +JAVAPATH=/usr/lib/jvm/java-1.17.0-openjdk-*/bin/ +JAVARELEASE=17 +# Verwendete Rosetta-Version +ROSETTAVERSION=7.3.0 + +# Pfad zum Rosetta-SDK +ROSETTASDK=/exlibris/dps/d4_1/system.dir/dps-sdk-${ROSETTAVERSION}/lib/ +# Pfad zum Rosetta-SDK, Deposit-Module +ROSETTASDKDEPOSIT=${ROSETTASDK}/../dps-sdk-projects/dps-sdk-deposit/lib +ROSETTASDKPLUGINS=${ROSETTASDK}/../../bundled_plugins/ + + +#XALAN_JAR=$(shell find /usr/share/java/ -name "xalan2.jar" ) +#XERCES_JAR=$(shell find /usr/share/java/ -name "xercesImpl.jar") +#XML_JAR=$(shell find /usr/share/java/ -name "xml-apis.jar") + +# classpath +JUNITCLASSPATH=/usr/share/java/junit4.jar +#SOURCESCLASSPATH=org/slub/rosetta/dps/repository/plugin/storage/nfs +CLASSPATH=${ROSETTASDKDEPOSIT}/dps-sdk-${ROSETTAVERSION}.jar +#BUILDPATH=$(CLASSPATH) + +# sources +SOURCES=java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java +OBJS=$(SOURCES:.java=.class) +JAR=TechnicalMetadataExtractorMediaInfoPlugin.jar +BUILD=build/ + +all: $(BUILD) $(JAR) + +help: + @echo "erzeugt Plugin für Rosetta von Exlibris" + @echo "" + @echo "Das Argument 'clean' löscht temporäre Dateien, 'help' gibt diese Hilfe aus und" + @echo "'compile' erzeugt ein JAR-File und ein Bash-Script welches das Java-Programm" + @echo "aufruft." + +jarclean: + @rm -Rf $(BUILD) + +test: $(OBJS) + java -cp ${CLASSPATH}:$(JUNITCLASSPATH) org.junit.runner.JUnitCore + +clean: jarclean + @rm -Rf doc/ + find ./java/org/ -name "*.class" -exec rm -f \{\} \; + @rm -Rf $(JAR) + +distclean: clean + find ./ -name "*~" -exec rm -f \{\} \; + @rm -Rf null + +$(BUILD): + @mkdir $(BUILD); + @mkdir $(BUILD)/lib + +$(JAR): $(OBJS) + @cp -r PLUGIN-INF/ $(BUILD) + @cp -r META-INF/ $(BUILD) + @cd java; find ./ -name "*.class" -print -exec cp --parents -r \{\} $(PWD)/$(BUILD) \; ; cd .. +# @cd resources; find ./ -type f -print -exec cp --parents -r \{\} $(PWD)/build \; ; cd .. + cp -a resources/ $(PWD)/$(BUILD) + +ifdef XALAN_JAR + echo "XALAN_JAR: unzip '$(XALAN_JAR)'" +# unzip $(XALAN_JAR) '*.class' -d $(BUILD); +# unzip $(XALAN_JAR) 'META-INF/services/*' -d $(BUILD) + cp --dereference $(XALAN_JAR) $(PWD)/$(BUILD)lib/ +endif +ifdef XERCES_JAR + echo "XERCES_JAR: unzip '$(XERCES_JAR)'" +# unzip $(XERCES_JAR) '*.class' -d $(BUILD); + cp --dereference $(XERCES_JAR) $(PWD)/$(BUILD)lib/ +endif +ifdef XML_JAR + echo "XML_JAR: unzip '$(XML_JAR)'" +# unzip $(XML_JAR) '*.class' -d $(BUILD); + cp --dereference $(XML_JAR) $(PWD)/$(BUILD)lib/ +endif + @cd $(BUILD); ${JAVAPATH}/jar cfvM ../$@ ./* ; cd .. + +%.class: %.java + ${JAVAPATH}/javac --release ${JAVARELEASE} -g -classpath ${CLASSPATH}:${JUNITCLASSPATH} -Xlint:all $< + +doc: $(SOURCES) + javadoc -d doc/ $^ + +check_prerequisites: + @echo -n "### Checking java path: $(JAVAPATH) ...." + @if [ -e $(JAVAPATH) ]; then echo "fine :)"; else echo " not found! :("; fi + @echo -n "### Checking Exlibris Rosetta SDK path: $(ROSETTASDK) ...." + @if [ -e $(ROSETTASDK) ]; then echo "fine :)"; else echo " not found! :("; fi + +.PHONY: help clean distclean jarclean test + diff --git a/PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml b/PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml new file mode 100644 index 0000000000000000000000000000000000000000..f86f80ef36d582ddba5bb212abc548f69be2b90d --- /dev/null +++ b/PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml @@ -0,0 +1,62 @@ +<pl:metadata-config xmlns:pl="http://www.exlibrisgroup.com/Plugins/1.0/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> + <pl:pluginTypeName>MDExtractorPlugin</pl:pluginTypeName> + <pl:deployName>TechnicalMetadataExtractorMediaInfoPlugin</pl:deployName> + <pl:className>org.slub.rosetta.dps.repository.plugin.TechnicalMetadataExtractorMediaInfoPlugin</pl:className> + <pl:initParameters> + <fr:x_form xmlns:fr="http://com/exlibris/digitool/common/forms/xmlbeans"> + <form_name>TechnicalMetadataExtractorMediaInfoPlugin_Params</form_name> + <description>TechnicalMetadataExtractorMediaInfoPlugin_parameters_form</description> + <version>0.1</version> + <grid_x>1</grid_x> + <md_format>1</md_format> + <x_fields> + <x_field> + <field_name>xsltproc_binary_path</field_name> + <label>Path to XSLTProc</label> + <ui_tool_tip>add full path to xsltproc</ui_tool_tip> + <mandatory>true</mandatory> + <x_logic_type>String</x_logic_type> + <x_ui_type>TextField</x_ui_type> + <default_value>/usr/bin/xsltproc</default_value> + <css_class>width40</css_class> + <x_options/> + </x_field> + <x_field> + <field_name>mediainfo_binary_path</field_name> + <label>Path to mediainfo</label> + <ui_tool_tip>add full path to mediainfo</ui_tool_tip> + <mandatory>true</mandatory> + <x_logic_type>String</x_logic_type> + <x_ui_type>TextField</x_ui_type> + <default_value>/usr/bin/mediainfo</default_value> + <css_class>width40</css_class> + <x_options/> + </x_field> + </x_fields> + </fr:x_form> + </pl:initParameters> + <pl:description>SLUB Technical Metadata Extractor Plugin, using MediaInfo + FFV1/Matroska-files</pl:description> + <pl:version>1.ß</pl:version> + <pl:materialType>DIGITAL</pl:materialType> + <pl:module>Preservation</pl:module> + <pl:generalType>TASK</pl:generalType> + <pl:publicApi>N</pl:publicApi> + <pl:implType>java</pl:implType> + <pl:contactPerson> + <contact_info xmlns="http://www.exlibrisgroup.com/Plugins/1.0/"> + <contactType>admin</contactType> + <firstName/> + <lastName>SLUB Dresden</lastName> + <telephone1/> + <telephone2/> + <email>langzeitarchiv@slub-dresden.de</email> + <address1>Zellescher Weg 18</address1> + <address2/> + <city>Dresden</city> + <zipCode>01069</zipCode> + <country>Germany</country> + </contact_info> + </pl:contactPerson> +</pl:metadata-config> diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3cd2bc83580029e435f473418cdf78128b944e2c --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +Plugin using MediaInfo to extract metadata from A/V files +========================================================= + +you could test it using 'test.sh' + +== compile + +* make clean +* make + +== install +* copy jar-file to /operational_shared/plugins/custom/ + +== configuration + +* add Mapping under "Preservation:Extractors", switch from "Global" to "Local", use + "Custom"-Tab +* fill the fields + +== copyright hints + +MediaInfo is released under Gnu General Public License 3.0 (or higher) +it could not be integrated and delivered as a binary only plugin. diff --git a/java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java new file mode 100644 index 0000000000000000000000000000000000000000..0191c6e4f128b2af717be5b176672c3e0e18bde3 --- /dev/null +++ b/java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java @@ -0,0 +1,377 @@ +/* +2017-2022 by Andreas Romeyke (SLUB Dresden) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package org.slub.rosetta.dps.repository.plugin; + +import com.exlibris.core.sdk.strings.StringUtils; +import com.exlibris.dps.sdk.techmd.TechnicalMDExtractorPlugin; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathFactory; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * SLUBTechnicalMetadataExtractorMediaConchPlugin + * + * @author andreas.romeyke@slub-dresden.de (Andreas Romeyke) + * @see com.exlibris.dps.sdk.techmd.TechnicalMDExtractorPlugin + */ +public class TechnicalMetadataExtractorMediaInfoPlugin implements TechnicalMDExtractorPlugin { + private String mediainfo_binary_path; + private String xsltproc_binary_path; + private final static String TRANSFORMER_XSL = "/resources/transformer.xsl"; + private final List<String> extractionErrors = new ArrayList<>(); + private final Map<String,String> attributes = new HashMap<>(); + //static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorMediaConchPlugin.class, ExLogger.VALIDATIONSTACK); + private final static ArrayList<String> attributesMappedToDnxPropertiesOfTypeNumber = new ArrayList<>() {{ + // HINT: based on FL 7.1100 mappings for mediainfo (https://github.com/rosetta-format-library/RosettaFormatLibrary/releases/tag/7.1100) + add("mediainfo.track.Audio.Duration"); // => audio.duration + add("mediainfo.track.Audio.SamplingRate"); // => audio.sampling_rate + add("mediainfo.track.Audio.StreamSize"); // => audio.stream_size + add("mediainfo.track.General.AudioCount"); // => general.audio_count + add("mediainfo.track.General.MenuCount"); // => general.menu_count + add("mediainfo.track.General.OtherCount"); // => general.other_count + add("mediainfo.track.General.StreamSize"); // => general.stream_size + add("mediainfo.track.General.TextCount"); // => general.text_count + add("mediainfo.track.General.VideoCount"); // => general.video_count + add("mediainfo.track.Video.BitRate"); // => video.bit_rate + add("mediainfo.track.Video.DisplayAspectRatio"); // => video.display_aspect_ratio + add("mediainfo.track.Video.Duration"); // => video.duration + add("mediainfo.track.Video.extra.MaxSlicesCount"); // => video.extra.max_slices_count + add("mediainfo.track.Video.FrameCount"); // => video.frame_count + }}; + + /** constructor */ + public TechnicalMetadataExtractorMediaInfoPlugin() { + //log.info("SLUBVirusCheckPlugin instantiated with host=" + host + " port=" + port + " timeout=" + timeout); + System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated"); + } + + /** init params to configure the plugin via xml forms + * @param initp parameter map + */ + public void initParams(Map<String, String> initp) { + this.mediainfo_binary_path = initp.get("mediainfo_binary_path").trim(); + this.xsltproc_binary_path = initp.get("xsltproc_binary_path").trim(); + try { + checkFileExists(this.mediainfo_binary_path); + checkFileExists(this.xsltproc_binary_path); + System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated with " + + " mediainfo_binary_path=" + mediainfo_binary_path + + " xsltproc_binary_path=" + xsltproc_binary_path + ); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + @Override + public void extract(String filePath) throws Exception { + if (StringUtils.isEmptyString(this.mediainfo_binary_path)) { + throw new Exception("mediainfo_binary_path not found"); + } + checkFileExists(this.mediainfo_binary_path); + checkFileExists(this.xsltproc_binary_path); + // mediainfo metadata extraction + callMediainfoAndResultProcessing(filePath); + } + + private void callMediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { + String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from; + System.out.println("executing: " + exec_mediainfo_string); + checkFileExists(this.mediainfo_binary_path); + InputStreamReader process_out; + try { + Process p = Runtime.getRuntime().exec(exec_mediainfo_string); + p.waitFor(); + process_out = new InputStreamReader(p.getInputStream()); + } catch (IOException e) { + //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); + System.out.println("ERROR: (actual) mediainfo not available, path=" + this.mediainfo_binary_path + ", " + e.getMessage()); + throw new Exception("ERROR: (actual) mediainfo not available, path=" + this.mediainfo_binary_path + ", " + e.getMessage()); + } + BufferedReader reader = new BufferedReader(process_out); + String line = reader.readLine(); + StringBuilder mediainfo_output = new StringBuilder(); + while (line != null) { + /* patch out mediainfo schema location because downloading of xsd is not allowed */ + /* example input: + <MediaInfo + xmlns="https://mediaarea.net/mediainfo" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="https://mediaarea.net/mediainfo https://mediaarea.net/mediainfo/mediainfo_2_0.xsd" + version="2.0"> + */ + String regex = "xsi:schemaLocation=\"[^\"]*\""; + String line_patched = line.replaceAll(regex, ""); + mediainfo_output.append(line_patched); + if (!line.equals(line_patched)) { + System.out.println("monkey patched line:\n- " + line + " to new line:\n+ " + line_patched); + } + line = reader.readLine(); + } + reader.close(); + FileWriter temp_media_outputstream = new FileWriter(temp_mediainfo_outputfile); + BufferedWriter temp_media_streamwriter = new BufferedWriter(temp_media_outputstream); + temp_media_streamwriter.append(mediainfo_output); + temp_media_streamwriter.flush(); + temp_media_outputstream.flush(); + temp_media_outputstream.close(); + temp_media_streamwriter.close(); + } + + private void callMediainfoAndResultProcessing(String filePath) throws Exception { + File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); + temp_media_outputfile.deleteOnExit(); + callMediainfo(filePath, String.valueOf(temp_media_outputfile)); + if (temp_media_outputfile.length() > 0) { + File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); + temp_media_transformed_outputfile.deleteOnExit(); + //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); + transformMediainfoOutput(temp_media_outputfile, temp_media_transformed_outputfile); + if (temp_media_transformed_outputfile.length() > 0) { + /* TODO: read transformed outputfile and return attributes */ + extractAttributesOfTransformedResult(temp_media_transformed_outputfile); + } else { + throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken"); + } + } else { + throw new Exception( "size of intermediate mediainfo file '" + temp_media_outputfile + "' is zero, something broken"); + } + temp_media_outputfile.delete(); + } + + private void transformMediainfoOutput(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { + File temp_xsdfile = createTempXsdFile(); + /* xslt transform */ + String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile; + System.out.println("executing: " + exec_xsltproc_string); + InputStreamReader process_out; + try { + Process p = Runtime.getRuntime().exec(exec_xsltproc_string); + p.waitFor(); + process_out = new InputStreamReader(p.getInputStream()); + } catch (IOException e) { + //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); + System.out.println("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage()); + throw new Exception("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage()); + } finally { + temp_xsdfile.delete(); + } + } + + private File createTempXsdFile() throws Exception { + /* write xsd */ + File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd"); + InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); + if (stylestream == null) { + throw new Exception("stylestream not found!"); + } + Files.copy(stylestream, Path.of(String.valueOf(temp_xsdfile)), StandardCopyOption.REPLACE_EXISTING); + stylestream.close(); + return temp_xsdfile; + } + + private void extractAttributesOfTransformedResult(File temp_media_transformed_outputfile) throws Exception { + checkFileExists(String.valueOf(temp_media_transformed_outputfile)); + XPathFactory xPathfactory = XPathFactory.newInstance(); + XPath xpath = xPathfactory.newXPath(); + XPathExpression expr = xpath.compile("/mdExtractor/attributes/key"); + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document temp_media_transformed_source = db.parse(temp_media_transformed_outputfile); + NodeList nl = (NodeList) expr.evaluate(temp_media_transformed_source, XPathConstants.NODESET); + int len = nl.getLength(); + for ( int i = 0; i < len; i++ ) { + Node node = nl.item(i); + String id = String.valueOf(node.getAttributes().getNamedItem("id").getTextContent()); + String value = node.getTextContent(); + attributes.put(id, value); + } + } + + + public String getAgentName() + { + return "mediainfo"; + } + + /** get agent version and signature version calling command VERSION + * + * @return string with version and signature version + */ + public String getAgent() { + StringBuilder response = new StringBuilder(); + response.append("mediaconch:\n"); + + String[] executables = { + this.mediainfo_binary_path + }; + for (String executable : executables) { + String execstring = executable + " --Version"; + InputStreamReader process_out = null; + try { + checkFileExists( executable); + Process p = Runtime.getRuntime().exec(execstring); + p.waitFor(); + process_out = new InputStreamReader(p.getInputStream()); + } catch (IOException e) { + //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + if (process_out != null) { + BufferedReader reader = new BufferedReader(process_out); + String line; + try { + line = reader.readLine(); + while (line != null) { + System.out.println(line); + response.append(line); + try { + line = reader.readLine(); + } catch (IOException e) { + e.printStackTrace(); + } + } + reader.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + return response.toString().trim(); + } + + @Override + public String getAttributeByName(String attribute) { + if (attributes.containsKey(attribute)) { + return attributes.get(attribute); + } + // HINT: Rosetta expects a number of specific DNX properties instead of text + if (attributesMappedToDnxPropertiesOfTypeNumber.contains( attribute )) { + return "0"; // type NUMBER + } + return "not found"; // type TEXT & STRING + } + + @Override + public List<String> getExtractionErrors() { + List<String> extractionErrors = this.extractionErrors; + return Collections.unmodifiableList(extractionErrors); + } + + /* base is the property file from original mediainfo-plugin of FL working group */ + @Override + public List<String> getSupportedAttributeNames() { + //return new ArrayList<String>(attributes.keySet()); + List<String> available = new ArrayList<>(); + try { + XPathFactory xPathfactory = XPathFactory.newInstance(); + XPath xpath = xPathfactory.newXPath(); + XPathExpression expr = xpath.compile("//*[local-name()='properties']/*[local-name()='property']"); + //XPathExpression expr = xpath.compile("//text()"); + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilder db = dbf.newDocumentBuilder(); + InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); + Document temp_property_list_xml = db.parse(stylestream); + NodeList nl = (NodeList) expr.evaluate(temp_property_list_xml, XPathConstants.NODESET); + int len = nl.getLength(); + for (int i = 0; i < len; i++) { + Node node = nl.item(i); + String value = node.getTextContent(); + available.add(value); + } + } catch (Exception e) { + System.out.println("error in getSupportedAttributeNaes, " + e.getMessage()); + } + return available; + } + + + private void checkFileExists (String filename) throws Exception { + File f = new File(filename); + if (! f.exists() ) { + System.out.println("ERROR: path=" + filename + " not available"); + throw new Exception("ERROR: path=" + filename + " not available"); + } + } + + /** stand-alone check, main file to call local installed clamd + * @param args list of files which should be scanned + */ + public static void main(String[] args) { + TechnicalMetadataExtractorMediaInfoPlugin plugin = new TechnicalMetadataExtractorMediaInfoPlugin(); + Map<String, String> initp = new HashMap<>(); + initp.put( "xsltproc_binary_path", "/usr/bin/xsltproc"); + initp.put( "mediainfo_binary_path", "/usr/bin/mediainfo"); + plugin.initParams( initp ); + System.out.println("----------------------------------"); + System.out.println("Agent: '" + plugin.getAgent() + "'"); + System.out.println(); + for (String file : args) { + try { + System.out.println("extracting from " + file); + plugin.extract(file); + } catch (Exception e) { + System.out.println("CALLERROR:"); + e.printStackTrace(); + return; + } + System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors()); + } + System.out.println("----------------------------------"); + System.out.println("getAgentName:"); + System.out.println( plugin.getAgentName()); + System.out.println("----------------------------------"); + System.out.println("getAgent:"); + System.out.println( plugin.getAgent()); + System.out.println("----------------------------------"); + System.out.println("getSupportedAttributeNames:"); + System.out.println( plugin.getSupportedAttributeNames()); + System.out.println("----------------------------------"); + System.out.println("getAttributeByName (summarized):"); + for (Map.Entry<String, String> m : plugin.attributes.entrySet()) { + String s = m.getKey() + " -> " + m.getValue(); + System.out.println(s); + } + } +} diff --git a/resources/properties.xml b/resources/properties.xml new file mode 100644 index 0000000000000000000000000000000000000000..c4f9da4b8ea7d560d9545ee54fcaf8e268d95a95 --- /dev/null +++ b/resources/properties.xml @@ -0,0 +1,109 @@ +<properties> +<property>mediainfo.track.Audio.BitDepth</property> +<property>mediainfo.track.Audio.BitRate</property> +<property>mediainfo.track.Audio.BitRate_Mode</property> +<property>mediainfo.track.Audio.Channels</property> +<property>mediainfo.track.Audio.CodecID</property> +<property>mediainfo.track.Audio.Commercial_Name</property> +<property>mediainfo.track.Audio.Compression_Mode</property> +<property>mediainfo.track.Audio.Default</property> +<property>mediainfo.track.Audio.Duration</property> +<property>mediainfo.track.Audio.encoded_library</property> +<property>mediainfo.track.Audio.Forced</property> +<property>mediainfo.track.Audio.Format</property> +<property>mediainfo.track.Audio.Format_Profile</property> +<property>mediainfo.track.Audio.Format_Settings_Wrapping</property> +<property>mediainfo.track.Audio.Format_Settings_Sign</property> +<property>mediainfo.track.Audio.Format_Version</property> +<property>mediainfo.track.Audio.FrameRate</property> +<property>mediainfo.track.Audio.ID</property> +<property>mediainfo.track.Audio.SamplingRate</property> +<property>mediainfo.track.Audio.ServiceKind</property> +<property>mediainfo.track.Audio.StreamSize</property> +<property>mediainfo.track.Audio.Title</property> +<property>mediainfo.track.General.AudioCount</property> +<property>mediainfo.track.General.CodecID</property> +<property>mediainfo.track.General.CompleteName</property> +<property>mediainfo.track.General.Description</property> +<property>mediainfo.track.General.Duration</property> +<property>mediainfo.track.General.Encoded_Application</property> +<property>mediainfo.track.General.Encoded_Application_CompanyName</property> +<property>mediainfo.track.General.Encoded_Application_Name</property> +<property>mediainfo.track.General.Encoded_Application_Version</property> +<property>mediainfo.track.General.Encoded_Date</property> +<property>mediainfo.track.General.Encoded_Library</property> +<property>mediainfo.track.General.Encoded_Library_Name</property> +<property>mediainfo.track.General.Encoded_Library_Version</property> +<property>mediainfo.track.General.extra.Attachments</property> +<property>mediainfo.track.General.extra.ErrorDetectionType</property> +<property>mediainfo.track.General.extra.IsTruncated</property> +<property>mediainfo.track.General.extra.bext_Present</property> +<property>mediainfo.track.General.FileSize</property> +<property>mediainfo.track.General.Format</property> +<property>mediainfo.track.General.Format_Profile</property> +<property>mediainfo.track.General.Format_Settings</property> +<property>mediainfo.track.General.Format_Version</property> +<property>mediainfo.track.General.IsStreamable</property> +<property>mediainfo.track.General.MenuCount</property> +<property>mediainfo.track.General.OtherCount</property> +<property>mediainfo.track.General.OverallBitRate</property> +<property>mediainfo.track.General.OverallBitRateMode</property> +<property>mediainfo.track.General.PackageName</property> +<property>mediainfo.track.General.Producer</property> +<property>mediainfo.track.General.StreamSize</property> +<property>mediainfo.track.General.TextCount</property> +<property>mediainfo.track.General.Title</property> +<property>mediainfo.track.General.UniqueID</property> +<property>mediainfo.track.General.VideoCount</property> +<property>mediainfo.track.Image.BitDepth</property> +<property>mediainfo.track.Image.ColorSpace</property> +<property>mediainfo.track.Image.colour_primaries</property> +<property>mediainfo.track.Image.Compression_Mode</property> +<property>mediainfo.track.Image.DisplayAspectRatio</property> +<property>mediainfo.track.Image.Encoded_Date</property> +<property>mediainfo.track.Image.Encoded_Library</property> +<property>mediainfo.track.Image.Format</property> +<property>mediainfo.track.Image.Format_Version</property> +<property>mediainfo.track.Image.FrameRate</property> +<property>mediainfo.track.Image.Height</property> +<property>mediainfo.track.Image.StreamSize</property> +<property>mediainfo.track.Image.transfer_characteristics</property> +<property>mediainfo.track.Image.Width</property> +<property>mediainfo.track.Video.BitDepth</property> +<property>mediainfo.track.Video.BitRate</property> +<property>mediainfo.track.Video.BitRate_Mode</property> +<property>mediainfo.track.Video.ChromaSubsampling</property> +<property>mediainfo.track.Video.CodecID</property> +<property>mediainfo.track.Video.ColorSpace</property> +<property>mediainfo.track.Video.BitsPixel_Frame</property> +<property>mediainfo.track.Video.Compression_Mode</property> +<property>mediainfo.track.Video.Default</property> +<property>mediainfo.track.Video.DisplayAspectRatio</property> +<property>mediainfo.track.Video.Duration</property> +<property>mediainfo.track.Video.Encoded_Library</property> +<property>mediainfo.track.Video.extra.coder_type</property> +<property>mediainfo.track.Video.extra.ErrorDetectionType</property> +<property>mediainfo.track.Video.extra.MaxSlicesCount</property> +<property>mediainfo.track.Video.extra.OriginalSourceMedium</property> +<property>mediainfo.track.Video.Forced</property> +<property>mediainfo.track.Video.Format</property> +<property>mediainfo.track.Video.Format_Profile</property> +<property>mediainfo.track.Video.Format_Settings_GOP</property> +<property>mediainfo.track.Video.Format_Settings_Wrapping</property> +<property>mediainfo.track.Video.Format_Version</property> +<property>mediainfo.track.Video.FrameCount</property> +<property>mediainfo.track.Video.FrameRate</property> +<property>mediainfo.track.Video.Height</property> +<property>mediainfo.track.Video.ID</property> +<property>mediainfo.track.Video.PixelAspectRatio</property> +<property>mediainfo.track.Video.PixelAspectRatioOriginal</property> +<property>mediainfo.track.Video.ScanOrder</property> +<property>mediainfo.track.Video.ScanType</property> +<property>mediainfo.track.Video.Standard</property> +<property>mediainfo.track.Video.StreamSize</property> +<property>mediainfo.track.Video.TimeCode_FirstFrame</property> +<property>mediainfo.track.Video.TimeCode_Source</property> +<property>mediainfo.track.Video.Title</property> +<property>mediainfo.track.Video.transfer_characteristics</property> +<property>mediainfo.track.Video.Width</property> +</properties> \ No newline at end of file diff --git a/resources/transformer.xsl b/resources/transformer.xsl new file mode 100644 index 0000000000000000000000000000000000000000..1059a42c17978072fb209a58947f382d59beb8b7 --- /dev/null +++ b/resources/transformer.xsl @@ -0,0 +1,268 @@ +<?xml version="1.0" encoding="ISO-8859-1"?> + +<!-- *********************************************************************************************************** + Edited by Kimberly A. Tryka - National Geographic Society - 7 October 2019 + Edits made to work with xml output from MediaInfo version 19.09 + Changes made: + - bound namespace prefix (mi) to namespace in <xsl:stylesheet> + - added exclude-result-prefixes to <xsl:stylesheet> + - all references to <Mediainfo> were changed to <MediaInfo> + - all references to <File> were changed to <media> + - used namespace prefix when referencing elements from the MediaInfo output file + + Tested using xsltproc (via command line) on linux + Currently running properly in our Rosetta 6.1 implementation + Developed and tested (Saxon PE) in Oxygen + + Note - changes also need to be made to properties.xml file to account for changes in + element names in MediaInfo. For example <File_properties> has become <File_Properties> + and <codec_ID> has become <codecID> - this is not an exhaustive list of these changes + + Note - after changes have been made to the properties.xml file - you will need to re-map + the fields for the extractor + ********************************************************************************************************** --> +<!-- *********************************************************************************************************** + Updated by Tyler Thorsted - Chrch of Jesus Christ of Latter-dayt Saints - April 2020 + Added the "Extra" tags for General & Video + ********************************************************************************************************** --> + + +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:mi="https://mediaarea.net/mediainfo" + exclude-result-prefixes="xs mi" + version="1.0"> + + <xsl:output encoding="UTF-8" method="xml" indent="yes"/> + + <xsl:variable name="genPath">mediainfo.track.General.</xsl:variable> + <xsl:variable name="genxPath">mediainfo.track.General.extra.</xsl:variable> + <xsl:variable name="vidPath">mediainfo.track.Video.</xsl:variable> + <xsl:variable name="vidxPath">mediainfo.track.Video.extra.</xsl:variable> + <xsl:variable name="audPath">mediainfo.track.Audio.</xsl:variable> + <xsl:variable name="imgPath">mediainfo.track.Image.</xsl:variable> + <xsl:variable name="txtPath">mediainfo.track.Text.</xsl:variable> + <xsl:variable name="otrPath">mediainfo.track.Other.</xsl:variable> + + <xsl:template match="/"> + <xsl:element name="mdExtractor" xmlns="http://com/exlibris/digitool/repository/api/xmlbeans"> + <xsl:element name="profile"/> + <xsl:element name="format_name"><xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='General']/mi:Format[1]" /></xsl:element> + <xsl:element name="formatVersion"><xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='General']/mi:Format_Version" /></xsl:element> + <xsl:element name="imageCount"/> + <xsl:element name="isValid">true</xsl:element> + <xsl:element name="isWellFormed">true</xsl:element> + <xsl:element name="attributes"> + <xsl:variable name="myList"><properties> + <property>mediainfo.track.Audio.BitDepth</property> + <property>mediainfo.track.Audio.BitRate</property> + <property>mediainfo.track.Audio.BitRate_Mode</property> + <property>mediainfo.track.Audio.Channels</property> + <property>mediainfo.track.Audio.CodecID</property> + <property>mediainfo.track.Audio.Commercial_Name</property> + <property>mediainfo.track.Audio.Compression_Mode</property> + <property>mediainfo.track.Audio.Default</property> + <property>mediainfo.track.Audio.Duration</property> + <property>mediainfo.track.Audio.encoded_library</property> + <property>mediainfo.track.Audio.Forced</property> + <property>mediainfo.track.Audio.Format</property> + <property>mediainfo.track.Audio.Format_Profile</property> + <property>mediainfo.track.Audio.Format_Settings_Wrapping</property> + <property>mediainfo.track.Audio.Format_Settings_Sign</property> + <property>mediainfo.track.Audio.Format_Version</property> + <property>mediainfo.track.Audio.FrameRate</property> + <property>mediainfo.track.Audio.ID</property> + <property>mediainfo.track.Audio.SamplingRate</property> + <property>mediainfo.track.Audio.ServiceKind</property> + <property>mediainfo.track.Audio.StreamSize</property> + <property>mediainfo.track.Audio.Title</property> + <property>mediainfo.track.General.AudioCount</property> + <property>mediainfo.track.General.CodecID</property> + <property>mediainfo.track.General.CompleteName</property> + <property>mediainfo.track.General.Description</property> + <property>mediainfo.track.General.Duration</property> + <property>mediainfo.track.General.Encoded_Application</property> + <property>mediainfo.track.General.Encoded_Application_CompanyName</property> + <property>mediainfo.track.General.Encoded_Application_Name</property> + <property>mediainfo.track.General.Encoded_Application_Version</property> + <property>mediainfo.track.General.Encoded_Date</property> + <property>mediainfo.track.General.Encoded_Library</property> + <property>mediainfo.track.General.Encoded_Library_Name</property> + <property>mediainfo.track.General.Encoded_Library_Version</property> + <property>mediainfo.track.General.extra.Attachments</property> + <property>mediainfo.track.General.extra.ErrorDetectionType</property> + <property>mediainfo.track.General.extra.IsTruncated</property> + <property>mediainfo.track.General.extra.bext_Present</property> + <property>mediainfo.track.General.FileSize</property> + <property>mediainfo.track.General.Format</property> + <property>mediainfo.track.General.Format_Profile</property> + <property>mediainfo.track.General.Format_Settings</property> + <property>mediainfo.track.General.Format_Version</property> + <property>mediainfo.track.General.IsStreamable</property> + <property>mediainfo.track.General.MenuCount</property> + <property>mediainfo.track.General.OtherCount</property> + <property>mediainfo.track.General.OverallBitRate</property> + <property>mediainfo.track.General.OverallBitRateMode</property> + <property>mediainfo.track.General.PackageName</property> + <property>mediainfo.track.General.Producer</property> + <property>mediainfo.track.General.StreamSize</property> + <property>mediainfo.track.General.TextCount</property> + <property>mediainfo.track.General.Title</property> + <property>mediainfo.track.General.UniqueID</property> + <property>mediainfo.track.General.VideoCount</property> + <property>mediainfo.track.Image.BitDepth</property> + <property>mediainfo.track.Image.ColorSpace</property> + <property>mediainfo.track.Image.colour_primaries</property> + <property>mediainfo.track.Image.Compression_Mode</property> + <property>mediainfo.track.Image.DisplayAspectRatio</property> + <property>mediainfo.track.Image.Encoded_Date</property> + <property>mediainfo.track.Image.Encoded_Library</property> + <property>mediainfo.track.Image.Format</property> + <property>mediainfo.track.Image.Format_Version</property> + <property>mediainfo.track.Image.FrameRate</property> + <property>mediainfo.track.Image.Height</property> + <property>mediainfo.track.Image.StreamSize</property> + <property>mediainfo.track.Image.transfer_characteristics</property> + <property>mediainfo.track.Image.Width</property> + <property>mediainfo.track.Video.BitDepth</property> + <property>mediainfo.track.Video.BitRate</property> + <property>mediainfo.track.Video.BitRate_Mode</property> + <property>mediainfo.track.Video.ChromaSubsampling</property> + <property>mediainfo.track.Video.CodecID</property> + <property>mediainfo.track.Video.ColorSpace</property> + <property>mediainfo.track.Video.BitsPixel_Frame</property> + <property>mediainfo.track.Video.Compression_Mode</property> + <property>mediainfo.track.Video.Default</property> + <property>mediainfo.track.Video.DisplayAspectRatio</property> + <property>mediainfo.track.Video.Duration</property> + <property>mediainfo.track.Video.Encoded_Library</property> + <property>mediainfo.track.Video.extra.coder_type</property> + <property>mediainfo.track.Video.extra.ErrorDetectionType</property> + <property>mediainfo.track.Video.extra.MaxSlicesCount</property> + <property>mediainfo.track.Video.extra.OriginalSourceMedium</property> + <property>mediainfo.track.Video.Forced</property> + <property>mediainfo.track.Video.Format</property> + <property>mediainfo.track.Video.Format_Profile</property> + <property>mediainfo.track.Video.Format_Settings_GOP</property> + <property>mediainfo.track.Video.Format_Settings_Wrapping</property> + <property>mediainfo.track.Video.Format_Version</property> + <property>mediainfo.track.Video.FrameCount</property> + <property>mediainfo.track.Video.FrameRate</property> + <property>mediainfo.track.Video.Height</property> + <property>mediainfo.track.Video.ID</property> + <property>mediainfo.track.Video.PixelAspectRatio</property> + <property>mediainfo.track.Video.PixelAspectRatioOriginal</property> + <property>mediainfo.track.Video.ScanOrder</property> + <property>mediainfo.track.Video.ScanType</property> + <property>mediainfo.track.Video.Standard</property> + <property>mediainfo.track.Video.StreamSize</property> + <property>mediainfo.track.Video.TimeCode_FirstFrame</property> + <property>mediainfo.track.Video.TimeCode_Source</property> + <property>mediainfo.track.Video.Title</property> + <property>mediainfo.track.Video.transfer_characteristics</property> + <property>mediainfo.track.Video.Width</property> + </properties> + </xsl:variable> + + <!-- General --> + <xsl:for-each select="/mi:MediaInfo/mi:media/mi:track[@type='General']/*[not(descendant::mi:extra) and not(ancestor-or-self::mi:extra)]"> + <xsl:variable name="prop"> + <xsl:text>General.</xsl:text><xsl:value-of select="name(.)"/> + </xsl:variable> + <xsl:if test="contains($myList,$prop)"> + <xsl:variable name="attName"> + <xsl:value-of select="substring-after($prop,'General.')"/> + </xsl:variable> + <xsl:element name="key"> + <xsl:attribute name="id"><xsl:value-of select="concat($genPath,$attName)"/></xsl:attribute> + <xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='General']/*[name()=$attName][1]" /> + </xsl:element> + </xsl:if> + </xsl:for-each> + + + <!-- General Extra--> + <xsl:for-each select="/mi:MediaInfo/mi:media/mi:track[@type='General']/mi:extra/*"> + <xsl:variable name="prop"> + <xsl:text>General.extra.</xsl:text><xsl:value-of select="name(.)"/> + </xsl:variable> + <xsl:if test="contains($myList,$prop)"> + <xsl:variable name="attName"> + <xsl:value-of select="substring-after($prop,'General.extra.')"/> + </xsl:variable> + <xsl:element name="key"> + <xsl:attribute name="id"><xsl:value-of select="concat($genxPath,$attName)"/></xsl:attribute> + <xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='General']/mi:extra/*[name()=$attName][1]" /> + </xsl:element> + </xsl:if> + </xsl:for-each> + + <!-- Video --> + <xsl:for-each select="/mi:MediaInfo/mi:media/mi:track[@type='Video']/*[not(descendant::mi:extra) and not(ancestor-or-self::mi:extra)]"> + <xsl:variable name="prop"> + <xsl:text>Video.</xsl:text><xsl:value-of select="name(.)"/> + </xsl:variable> + <xsl:if test="contains($myList,$prop)"> + <xsl:variable name="attName"> + <xsl:value-of select="substring-after($prop,'Video.')"/> + </xsl:variable> + <xsl:element name="key"> + <xsl:attribute name="id"><xsl:value-of select="concat($vidPath,$attName)"/></xsl:attribute> + <xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='Video']/*[name()=$attName][1]" /> + </xsl:element> + </xsl:if> + </xsl:for-each> + + <!-- Video Extra --> + <xsl:for-each select="/mi:MediaInfo/mi:media/mi:track[@type='Video']/mi:extra/*"> + <xsl:variable name="prop"> + <xsl:text>Video.extra.</xsl:text><xsl:value-of select="name(.)"/> + </xsl:variable> + <xsl:if test="contains($myList,$prop)"> + <xsl:variable name="attName"> + <xsl:value-of select="substring-after($prop,'Video.extra.')"/> + </xsl:variable> + <xsl:element name="key"> + <xsl:attribute name="id"><xsl:value-of select="concat($vidxPath,$attName)"/></xsl:attribute> + <xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='Video']/mi:extra/*[name()=$attName][1]" /> + </xsl:element> + </xsl:if> + </xsl:for-each> + + <!-- Audio --> + <xsl:for-each select="/mi:MediaInfo/mi:media/mi:track[@type='Audio']/*"> + <xsl:variable name="prop"> + <xsl:text>Audio.</xsl:text><xsl:value-of select="name(.)"/> + </xsl:variable> + <xsl:if test="contains($myList,$prop)"> + <xsl:variable name="attName"> + <xsl:value-of select="substring-after($prop,'Audio.')"/> + </xsl:variable> + <xsl:element name="key"> + <xsl:attribute name="id"><xsl:value-of select="concat($audPath,$attName)"/></xsl:attribute> + <xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='Audio']/*[name()=$attName][1]" /> + </xsl:element> + </xsl:if> + </xsl:for-each> + + <!-- Image --> + <xsl:for-each select="/mi:MediaInfo/mi:media/mi:track[@type='Image']/*"> + <xsl:variable name="prop"> + <xsl:text>Image.</xsl:text><xsl:value-of select="name(.)"/> + </xsl:variable> + <xsl:if test="contains($myList,$prop)"> + <xsl:variable name="attName"> + <xsl:value-of select="substring-after($prop,'Image.')"/> + </xsl:variable> + <xsl:element name="key"> + <xsl:attribute name="id"><xsl:value-of select="concat($imgPath,$attName)"/></xsl:attribute> + <xsl:value-of select="/mi:MediaInfo/mi:media/mi:track[@type='Image']/*[name()=$attName][1]" /> + </xsl:element> + </xsl:if> + </xsl:for-each> + + </xsl:element> + </xsl:element> + </xsl:template> + +</xsl:stylesheet> \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..d51d847715c986036295ffa0252ff5ae705549ee --- /dev/null +++ b/test.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# check arguments +if [ "$#" -lt 1 ]; then echo "error: no media file given"; exit 1; fi +if [ ! -f "$1" ]; then echo "error: media file '$1' does not exist"; exit 1; fi + +# set variables +MEDIA_FILE="$1"; +ROSETTAVERSION="7.1.0" +ROSETTA_SDK_DIR="/exlibris/dps/d4_1/system.dir/dps-sdk-${ROSETTAVERSION}" +ROSETTA_SDK_JAR="${ROSETTA_SDK_DIR}/lib/dps-sdk-${ROSETTAVERSION}.jar" +PLUGIN_JAR="./TechnicalMetadataExtractorMediaInfoPlugin.jar" +PLUGIN_NAME="org.slub.rosetta.dps.repository.plugin.TechnicalMetadataExtractorMediaInfoPlugin" + +# run plugin +java -cp "${ROSETTA_SDK_JAR}:${PLUGIN_JAR}" "${PLUGIN_NAME}" "${MEDIA_FILE}" \ No newline at end of file