From 493d952694da3759003caeb0ff69df6911a9c64e Mon Sep 17 00:00:00 2001 From: Andreas Romeyke <art1@andreas-romeyke.de> Date: Tue, 13 Dec 2022 10:13:18 +0100 Subject: [PATCH] - changed to use script plugin from https://github.com/rosetta-format-library/Rosetta.MDExtractor-script-plugins as base --- META-INF/MANIFEST.MF | 7 +- .../javax.xml.transform.TransformerFactory | 1 - Makefile | 104 +---- .../metadata_MediaInfoMDExtractorPlugin.xml | 31 ++ ...nicalMetadataExtractorMediaConchPlugin.xml | 62 --- {resources => PLUGIN-INF}/properties.xml | 2 - {resources => PLUGIN-INF}/transformer.xsl | 111 +----- README.md | 25 +- bin/media_info_extractor.bat | 0 bin/media_info_extractor.sh | 21 + ...nicalMetadataExtractorMediaInfoPlugin.java | 377 ------------------ test.sh | 16 - 12 files changed, 82 insertions(+), 675 deletions(-) delete mode 100644 META-INF/services/javax.xml.transform.TransformerFactory create mode 100644 PLUGIN-INF/metadata_MediaInfoMDExtractorPlugin.xml delete mode 100644 PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml rename {resources => PLUGIN-INF}/properties.xml (96%) rename {resources => PLUGIN-INF}/transformer.xsl (52%) create mode 100644 bin/media_info_extractor.bat create mode 100644 bin/media_info_extractor.sh delete mode 100644 java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java delete mode 100644 test.sh diff --git a/META-INF/MANIFEST.MF b/META-INF/MANIFEST.MF index 9483aa9..dd745be 100644 --- a/META-INF/MANIFEST.MF +++ b/META-INF/MANIFEST.MF @@ -1,3 +1,4 @@ -Main-Class: org.slub.rosetta.dps.repository.plugin.TechnicalMetadataExtractorMediaInfoPlugin - - +Manifest-Version: 1.0 +Ant-Version: Apache Ant 1.9.2 +Created-By: 1.8.0_45-b15 (Oracle Corporation) + diff --git a/META-INF/services/javax.xml.transform.TransformerFactory b/META-INF/services/javax.xml.transform.TransformerFactory deleted file mode 100644 index 8e877dc..0000000 --- a/META-INF/services/javax.xml.transform.TransformerFactory +++ /dev/null @@ -1 +0,0 @@ -org.apache.xalan.processor.TransformerFactoryImpl diff --git a/Makefile b/Makefile index 4292e0a..da8283e 100644 --- a/Makefile +++ b/Makefile @@ -1,99 +1,15 @@ -#!/bin/bash -# Andreas Romeyke, SLUB Dresden +DEPENDENCIES:=PLUGIN-INF/metadata_MediaInfoMDExtractorPlugin.xml PLUGIN-INF/properties.xml PLUGIN-INF/transformer.xsl \ +META-INF/MANIFEST.MF bin/media_info_extractor.sh bin/media_info_extractor.bat -# Pfad zu Java 11 -JAVAPATH=/usr/lib/jvm/java-1.17.0-openjdk-*/bin/ -JAVARELEASE=17 -# Verwendete Rosetta-Version -ROSETTAVERSION=7.3.0 +all: build/ build/MediaInfoMDExtractorPlugin.jar -# Pfad zum Rosetta-SDK -ROSETTASDK=/exlibris/dps/d4_1/system.dir/dps-sdk-${ROSETTAVERSION}/lib/ -# Pfad zum Rosetta-SDK, Deposit-Module -ROSETTASDKDEPOSIT=${ROSETTASDK}/../dps-sdk-projects/dps-sdk-deposit/lib -ROSETTASDKPLUGINS=${ROSETTASDK}/../../bundled_plugins/ +build/: + mkdir -p build +build/MediaInfoMDExtractorPlugin.jar: $(DEPENDENCIES) + zip -r $@ $? -#XALAN_JAR=$(shell find /usr/share/java/ -name "xalan2.jar" ) -#XERCES_JAR=$(shell find /usr/share/java/ -name "xercesImpl.jar") -#XML_JAR=$(shell find /usr/share/java/ -name "xml-apis.jar") - -# classpath -JUNITCLASSPATH=/usr/share/java/junit4.jar -#SOURCESCLASSPATH=org/slub/rosetta/dps/repository/plugin/storage/nfs -CLASSPATH=${ROSETTASDKDEPOSIT}/dps-sdk-${ROSETTAVERSION}.jar -#BUILDPATH=$(CLASSPATH) - -# sources -SOURCES=java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java -OBJS=$(SOURCES:.java=.class) -JAR=TechnicalMetadataExtractorMediaInfoPlugin.jar -BUILD=build/ - -all: $(BUILD) $(JAR) - -help: - @echo "erzeugt Plugin für Rosetta von Exlibris" - @echo "" - @echo "Das Argument 'clean' löscht temporäre Dateien, 'help' gibt diese Hilfe aus und" - @echo "'compile' erzeugt ein JAR-File und ein Bash-Script welches das Java-Programm" - @echo "aufruft." - -jarclean: - @rm -Rf $(BUILD) - -test: $(OBJS) - java -cp ${CLASSPATH}:$(JUNITCLASSPATH) org.junit.runner.JUnitCore - -clean: jarclean - @rm -Rf doc/ - find ./java/org/ -name "*.class" -exec rm -f \{\} \; - @rm -Rf $(JAR) - -distclean: clean - find ./ -name "*~" -exec rm -f \{\} \; - @rm -Rf null - -$(BUILD): - @mkdir $(BUILD); - @mkdir $(BUILD)/lib - -$(JAR): $(OBJS) - @cp -r PLUGIN-INF/ $(BUILD) - @cp -r META-INF/ $(BUILD) - @cd java; find ./ -name "*.class" -print -exec cp --parents -r \{\} $(PWD)/$(BUILD) \; ; cd .. -# @cd resources; find ./ -type f -print -exec cp --parents -r \{\} $(PWD)/build \; ; cd .. - cp -a resources/ $(PWD)/$(BUILD) - -ifdef XALAN_JAR - echo "XALAN_JAR: unzip '$(XALAN_JAR)'" -# unzip $(XALAN_JAR) '*.class' -d $(BUILD); -# unzip $(XALAN_JAR) 'META-INF/services/*' -d $(BUILD) - cp --dereference $(XALAN_JAR) $(PWD)/$(BUILD)lib/ -endif -ifdef XERCES_JAR - echo "XERCES_JAR: unzip '$(XERCES_JAR)'" -# unzip $(XERCES_JAR) '*.class' -d $(BUILD); - cp --dereference $(XERCES_JAR) $(PWD)/$(BUILD)lib/ -endif -ifdef XML_JAR - echo "XML_JAR: unzip '$(XML_JAR)'" -# unzip $(XML_JAR) '*.class' -d $(BUILD); - cp --dereference $(XML_JAR) $(PWD)/$(BUILD)lib/ -endif - @cd $(BUILD); ${JAVAPATH}/jar cfvM ../$@ ./* ; cd .. - -%.class: %.java - ${JAVAPATH}/javac --release ${JAVARELEASE} -g -classpath ${CLASSPATH}:${JUNITCLASSPATH} -Xlint:all $< - -doc: $(SOURCES) - javadoc -d doc/ $^ - -check_prerequisites: - @echo -n "### Checking java path: $(JAVAPATH) ...." - @if [ -e $(JAVAPATH) ]; then echo "fine :)"; else echo " not found! :("; fi - @echo -n "### Checking Exlibris Rosetta SDK path: $(ROSETTASDK) ...." - @if [ -e $(ROSETTASDK) ]; then echo "fine :)"; else echo " not found! :("; fi - -.PHONY: help clean distclean jarclean test +clean: + rm -Rf build +.PHONY: all clean diff --git a/PLUGIN-INF/metadata_MediaInfoMDExtractorPlugin.xml b/PLUGIN-INF/metadata_MediaInfoMDExtractorPlugin.xml new file mode 100644 index 0000000..540ebd2 --- /dev/null +++ b/PLUGIN-INF/metadata_MediaInfoMDExtractorPlugin.xml @@ -0,0 +1,31 @@ +<pl:metadata-config xmlns:pl="http://www.exlibrisgroup.com/Plugins/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > + <pl:pluginTypeName>MDExtractorPlugin</pl:pluginTypeName> + <pl:deployName>MediaInfoMDExtractorPlugin</pl:deployName> + <pl:className>bin\media_info_extractor</pl:className> + <pl:initParameters> + <fr:x_form xmlns:fr="http://com/exlibris/digitool/common/forms/xmlbeans"> + </fr:x_form> + </pl:initParameters> + <pl:description>MediaInfo MD Extractor Script Plugin</pl:description> + <pl:version>1.1</pl:version> + <pl:materialType>DIGITAL</pl:materialType> + <pl:module>Preservation</pl:module> + <pl:generalType>TASK</pl:generalType> + <pl:publicApi>N</pl:publicApi> + <pl:implType>script</pl:implType> + <pl:contactPerson> + <contact_info xmlns="http://www.exlibrisgroup.com/Plugins/1.0/"> + <contactType>admin</contactType> + <firstName>Exlibris</firstName> + <lastName>Ltd</lastName> + <telephone1>(050)123-1234</telephone1> + <telephone2></telephone2> + <email>admin@exlibris.co.il</email> + <address1>Agodat Asport2 Building 9</address1> + <address2></address2> + <city>Jerusalem</city> + <zipCode>00000</zipCode> + <country>Israel</country> + </contact_info> + </pl:contactPerson> +</pl:metadata-config> diff --git a/PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml b/PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml deleted file mode 100644 index 589496c..0000000 --- a/PLUGIN-INF/metadata_SLUBTechnicalMetadataExtractorMediaConchPlugin.xml +++ /dev/null @@ -1,62 +0,0 @@ -<pl:metadata-config xmlns:pl="http://www.exlibrisgroup.com/Plugins/1.0/" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> - <pl:pluginTypeName>TechnicalMDExtractorPlugin</pl:pluginTypeName> - <pl:deployName>TechnicalMetadataExtractorMediaInfoPlugin</pl:deployName> - <pl:className>org.slub.rosetta.dps.repository.plugin.TechnicalMetadataExtractorMediaInfoPlugin</pl:className> - <pl:initParameters> - <fr:x_form xmlns:fr="http://com/exlibris/digitool/common/forms/xmlbeans"> - <form_name>TechnicalMetadataExtractorMediaInfoPlugin_Params</form_name> - <description>TechnicalMetadataExtractorMediaInfoPlugin_parameters_form</description> - <version>0.1</version> - <grid_x>1</grid_x> - <md_format>1</md_format> - <x_fields> - <x_field> - <field_name>xsltproc_binary_path</field_name> - <label>Path to XSLTProc</label> - <ui_tool_tip>add full path to xsltproc</ui_tool_tip> - <mandatory>true</mandatory> - <x_logic_type>String</x_logic_type> - <x_ui_type>TextField</x_ui_type> - <default_value>/usr/bin/xsltproc</default_value> - <css_class>width40</css_class> - <x_options/> - </x_field> - <x_field> - <field_name>mediainfo_binary_path</field_name> - <label>Path to mediainfo</label> - <ui_tool_tip>add full path to mediainfo</ui_tool_tip> - <mandatory>true</mandatory> - <x_logic_type>String</x_logic_type> - <x_ui_type>TextField</x_ui_type> - <default_value>/usr/bin/mediainfo</default_value> - <css_class>width40</css_class> - <x_options/> - </x_field> - </x_fields> - </fr:x_form> - </pl:initParameters> - <pl:description>SLUB Technical Metadata Extractor Plugin, using MediaInfo - FFV1/Matroska-files</pl:description> - <pl:version>1.0</pl:version> - <pl:materialType>DIGITAL</pl:materialType> - <pl:module>Preservation</pl:module> - <pl:generalType>TASK</pl:generalType> - <pl:publicApi>N</pl:publicApi> - <pl:implType>java</pl:implType> - <pl:contactPerson> - <contact_info xmlns="http://www.exlibrisgroup.com/Plugins/1.0/"> - <contactType>admin</contactType> - <firstName/> - <lastName>SLUB Dresden</lastName> - <telephone1/> - <telephone2/> - <email>langzeitarchiv@slub-dresden.de</email> - <address1>Zellescher Weg 18</address1> - <address2/> - <city>Dresden</city> - <zipCode>01069</zipCode> - <country>Germany</country> - </contact_info> - </pl:contactPerson> -</pl:metadata-config> diff --git a/resources/properties.xml b/PLUGIN-INF/properties.xml similarity index 96% rename from resources/properties.xml rename to PLUGIN-INF/properties.xml index c4f9da4..14d17b8 100644 --- a/resources/properties.xml +++ b/PLUGIN-INF/properties.xml @@ -34,7 +34,6 @@ <property>mediainfo.track.General.Encoded_Library</property> <property>mediainfo.track.General.Encoded_Library_Name</property> <property>mediainfo.track.General.Encoded_Library_Version</property> -<property>mediainfo.track.General.extra.Attachments</property> <property>mediainfo.track.General.extra.ErrorDetectionType</property> <property>mediainfo.track.General.extra.IsTruncated</property> <property>mediainfo.track.General.extra.bext_Present</property> @@ -75,7 +74,6 @@ <property>mediainfo.track.Video.ChromaSubsampling</property> <property>mediainfo.track.Video.CodecID</property> <property>mediainfo.track.Video.ColorSpace</property> -<property>mediainfo.track.Video.BitsPixel_Frame</property> <property>mediainfo.track.Video.Compression_Mode</property> <property>mediainfo.track.Video.Default</property> <property>mediainfo.track.Video.DisplayAspectRatio</property> diff --git a/resources/transformer.xsl b/PLUGIN-INF/transformer.xsl similarity index 52% rename from resources/transformer.xsl rename to PLUGIN-INF/transformer.xsl index 1059a42..2e4dcdf 100644 --- a/resources/transformer.xsl +++ b/PLUGIN-INF/transformer.xsl @@ -53,115 +53,8 @@ <xsl:element name="isValid">true</xsl:element> <xsl:element name="isWellFormed">true</xsl:element> <xsl:element name="attributes"> - <xsl:variable name="myList"><properties> - <property>mediainfo.track.Audio.BitDepth</property> - <property>mediainfo.track.Audio.BitRate</property> - <property>mediainfo.track.Audio.BitRate_Mode</property> - <property>mediainfo.track.Audio.Channels</property> - <property>mediainfo.track.Audio.CodecID</property> - <property>mediainfo.track.Audio.Commercial_Name</property> - <property>mediainfo.track.Audio.Compression_Mode</property> - <property>mediainfo.track.Audio.Default</property> - <property>mediainfo.track.Audio.Duration</property> - <property>mediainfo.track.Audio.encoded_library</property> - <property>mediainfo.track.Audio.Forced</property> - <property>mediainfo.track.Audio.Format</property> - <property>mediainfo.track.Audio.Format_Profile</property> - <property>mediainfo.track.Audio.Format_Settings_Wrapping</property> - <property>mediainfo.track.Audio.Format_Settings_Sign</property> - <property>mediainfo.track.Audio.Format_Version</property> - <property>mediainfo.track.Audio.FrameRate</property> - <property>mediainfo.track.Audio.ID</property> - <property>mediainfo.track.Audio.SamplingRate</property> - <property>mediainfo.track.Audio.ServiceKind</property> - <property>mediainfo.track.Audio.StreamSize</property> - <property>mediainfo.track.Audio.Title</property> - <property>mediainfo.track.General.AudioCount</property> - <property>mediainfo.track.General.CodecID</property> - <property>mediainfo.track.General.CompleteName</property> - <property>mediainfo.track.General.Description</property> - <property>mediainfo.track.General.Duration</property> - <property>mediainfo.track.General.Encoded_Application</property> - <property>mediainfo.track.General.Encoded_Application_CompanyName</property> - <property>mediainfo.track.General.Encoded_Application_Name</property> - <property>mediainfo.track.General.Encoded_Application_Version</property> - <property>mediainfo.track.General.Encoded_Date</property> - <property>mediainfo.track.General.Encoded_Library</property> - <property>mediainfo.track.General.Encoded_Library_Name</property> - <property>mediainfo.track.General.Encoded_Library_Version</property> - <property>mediainfo.track.General.extra.Attachments</property> - <property>mediainfo.track.General.extra.ErrorDetectionType</property> - <property>mediainfo.track.General.extra.IsTruncated</property> - <property>mediainfo.track.General.extra.bext_Present</property> - <property>mediainfo.track.General.FileSize</property> - <property>mediainfo.track.General.Format</property> - <property>mediainfo.track.General.Format_Profile</property> - <property>mediainfo.track.General.Format_Settings</property> - <property>mediainfo.track.General.Format_Version</property> - <property>mediainfo.track.General.IsStreamable</property> - <property>mediainfo.track.General.MenuCount</property> - <property>mediainfo.track.General.OtherCount</property> - <property>mediainfo.track.General.OverallBitRate</property> - <property>mediainfo.track.General.OverallBitRateMode</property> - <property>mediainfo.track.General.PackageName</property> - <property>mediainfo.track.General.Producer</property> - <property>mediainfo.track.General.StreamSize</property> - <property>mediainfo.track.General.TextCount</property> - <property>mediainfo.track.General.Title</property> - <property>mediainfo.track.General.UniqueID</property> - <property>mediainfo.track.General.VideoCount</property> - <property>mediainfo.track.Image.BitDepth</property> - <property>mediainfo.track.Image.ColorSpace</property> - <property>mediainfo.track.Image.colour_primaries</property> - <property>mediainfo.track.Image.Compression_Mode</property> - <property>mediainfo.track.Image.DisplayAspectRatio</property> - <property>mediainfo.track.Image.Encoded_Date</property> - <property>mediainfo.track.Image.Encoded_Library</property> - <property>mediainfo.track.Image.Format</property> - <property>mediainfo.track.Image.Format_Version</property> - <property>mediainfo.track.Image.FrameRate</property> - <property>mediainfo.track.Image.Height</property> - <property>mediainfo.track.Image.StreamSize</property> - <property>mediainfo.track.Image.transfer_characteristics</property> - <property>mediainfo.track.Image.Width</property> - <property>mediainfo.track.Video.BitDepth</property> - <property>mediainfo.track.Video.BitRate</property> - <property>mediainfo.track.Video.BitRate_Mode</property> - <property>mediainfo.track.Video.ChromaSubsampling</property> - <property>mediainfo.track.Video.CodecID</property> - <property>mediainfo.track.Video.ColorSpace</property> - <property>mediainfo.track.Video.BitsPixel_Frame</property> - <property>mediainfo.track.Video.Compression_Mode</property> - <property>mediainfo.track.Video.Default</property> - <property>mediainfo.track.Video.DisplayAspectRatio</property> - <property>mediainfo.track.Video.Duration</property> - <property>mediainfo.track.Video.Encoded_Library</property> - <property>mediainfo.track.Video.extra.coder_type</property> - <property>mediainfo.track.Video.extra.ErrorDetectionType</property> - <property>mediainfo.track.Video.extra.MaxSlicesCount</property> - <property>mediainfo.track.Video.extra.OriginalSourceMedium</property> - <property>mediainfo.track.Video.Forced</property> - <property>mediainfo.track.Video.Format</property> - <property>mediainfo.track.Video.Format_Profile</property> - <property>mediainfo.track.Video.Format_Settings_GOP</property> - <property>mediainfo.track.Video.Format_Settings_Wrapping</property> - <property>mediainfo.track.Video.Format_Version</property> - <property>mediainfo.track.Video.FrameCount</property> - <property>mediainfo.track.Video.FrameRate</property> - <property>mediainfo.track.Video.Height</property> - <property>mediainfo.track.Video.ID</property> - <property>mediainfo.track.Video.PixelAspectRatio</property> - <property>mediainfo.track.Video.PixelAspectRatioOriginal</property> - <property>mediainfo.track.Video.ScanOrder</property> - <property>mediainfo.track.Video.ScanType</property> - <property>mediainfo.track.Video.Standard</property> - <property>mediainfo.track.Video.StreamSize</property> - <property>mediainfo.track.Video.TimeCode_FirstFrame</property> - <property>mediainfo.track.Video.TimeCode_Source</property> - <property>mediainfo.track.Video.Title</property> - <property>mediainfo.track.Video.transfer_characteristics</property> - <property>mediainfo.track.Video.Width</property> - </properties> + <xsl:variable name="myList"> + <xsl:value-of select="document('properties.xml')"/> </xsl:variable> <!-- General --> diff --git a/README.md b/README.md index 3cd2bc8..b72cbd8 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,26 @@ Plugin using MediaInfo to extract metadata from A/V files ========================================================= -you could test it using 'test.sh' - -== compile +compile +------- * make clean * make -== install +install +------- + +* downgrade format library to version XXXXX * copy jar-file to /operational_shared/plugins/custom/ +* upgrade format library to version YYYYYY -== configuration +configuration +------------- -* add Mapping under "Preservation:Extractors", switch from "Global" to "Local", use - "Custom"-Tab -* fill the fields +* nothing needed, because mapping is part of current format library -== copyright hints +copyright hints +--------------- -MediaInfo is released under Gnu General Public License 3.0 (or higher) -it could not be integrated and delivered as a binary only plugin. +MediaInfo is released under Gnu General Public License 3.0 (or higher), +therefore it could not be integrated and delivered as a binary only plugin. diff --git a/bin/media_info_extractor.bat b/bin/media_info_extractor.bat new file mode 100644 index 0000000..e69de29 diff --git a/bin/media_info_extractor.sh b/bin/media_info_extractor.sh new file mode 100644 index 0000000..9fd9988 --- /dev/null +++ b/bin/media_info_extractor.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +# tool output file +outFile="out" +now=`awk -v min=5 -v max=10 'BEGIN{srand(); print int(1+rand()*(999999-min+1))}'` +outFile='out_'$now'.xml' +mediainfo -f --Output=XML $1 > $outFile + +tool_agent=`mediainfo --Version | sed ':a;N;s,\n,,g'` +pathToXsl=`echo $0 | sed -e 's,bin/media_info_extractor.sh,PLUGIN-INF/transformer.xsl,g'` + +output=`xsltproc $pathToXsl $outFile` +mimeType=`mediainfo --Language=raw --Full --Inform="General;%InternetMediaType%" $1` +output=`echo $output | sed -e 's,</mdExtractor>,,g'` +output=$output'<agent>'$tool_agent'</agent><mimeType>'$mimeType'</mimeType></mdExtractor>' + +retval=$status +echo $output +rm $outFile + +exit $retval \ No newline at end of file diff --git a/java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java deleted file mode 100644 index 0191c6e..0000000 --- a/java/org/slub/rosetta/dps/repository/plugin/TechnicalMetadataExtractorMediaInfoPlugin.java +++ /dev/null @@ -1,377 +0,0 @@ -/* -2017-2022 by Andreas Romeyke (SLUB Dresden) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package org.slub.rosetta.dps.repository.plugin; - -import com.exlibris.core.sdk.strings.StringUtils; -import com.exlibris.dps.sdk.techmd.TechnicalMDExtractorPlugin; -import org.w3c.dom.Document; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.xpath.XPath; -import javax.xml.xpath.XPathConstants; -import javax.xml.xpath.XPathExpression; -import javax.xml.xpath.XPathFactory; -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * SLUBTechnicalMetadataExtractorMediaConchPlugin - * - * @author andreas.romeyke@slub-dresden.de (Andreas Romeyke) - * @see com.exlibris.dps.sdk.techmd.TechnicalMDExtractorPlugin - */ -public class TechnicalMetadataExtractorMediaInfoPlugin implements TechnicalMDExtractorPlugin { - private String mediainfo_binary_path; - private String xsltproc_binary_path; - private final static String TRANSFORMER_XSL = "/resources/transformer.xsl"; - private final List<String> extractionErrors = new ArrayList<>(); - private final Map<String,String> attributes = new HashMap<>(); - //static final ExLogger log = ExLogger.getExLogger(SLUBTechnicalMetadataExtractorMediaConchPlugin.class, ExLogger.VALIDATIONSTACK); - private final static ArrayList<String> attributesMappedToDnxPropertiesOfTypeNumber = new ArrayList<>() {{ - // HINT: based on FL 7.1100 mappings for mediainfo (https://github.com/rosetta-format-library/RosettaFormatLibrary/releases/tag/7.1100) - add("mediainfo.track.Audio.Duration"); // => audio.duration - add("mediainfo.track.Audio.SamplingRate"); // => audio.sampling_rate - add("mediainfo.track.Audio.StreamSize"); // => audio.stream_size - add("mediainfo.track.General.AudioCount"); // => general.audio_count - add("mediainfo.track.General.MenuCount"); // => general.menu_count - add("mediainfo.track.General.OtherCount"); // => general.other_count - add("mediainfo.track.General.StreamSize"); // => general.stream_size - add("mediainfo.track.General.TextCount"); // => general.text_count - add("mediainfo.track.General.VideoCount"); // => general.video_count - add("mediainfo.track.Video.BitRate"); // => video.bit_rate - add("mediainfo.track.Video.DisplayAspectRatio"); // => video.display_aspect_ratio - add("mediainfo.track.Video.Duration"); // => video.duration - add("mediainfo.track.Video.extra.MaxSlicesCount"); // => video.extra.max_slices_count - add("mediainfo.track.Video.FrameCount"); // => video.frame_count - }}; - - /** constructor */ - public TechnicalMetadataExtractorMediaInfoPlugin() { - //log.info("SLUBVirusCheckPlugin instantiated with host=" + host + " port=" + port + " timeout=" + timeout); - System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated"); - } - - /** init params to configure the plugin via xml forms - * @param initp parameter map - */ - public void initParams(Map<String, String> initp) { - this.mediainfo_binary_path = initp.get("mediainfo_binary_path").trim(); - this.xsltproc_binary_path = initp.get("xsltproc_binary_path").trim(); - try { - checkFileExists(this.mediainfo_binary_path); - checkFileExists(this.xsltproc_binary_path); - System.out.println("SLUBTechnicalMetadataExtractorMediaConchPlugin instantiated with " - + " mediainfo_binary_path=" + mediainfo_binary_path - + " xsltproc_binary_path=" + xsltproc_binary_path - ); - } catch (Exception e) { - e.printStackTrace(); - } - } - - - @Override - public void extract(String filePath) throws Exception { - if (StringUtils.isEmptyString(this.mediainfo_binary_path)) { - throw new Exception("mediainfo_binary_path not found"); - } - checkFileExists(this.mediainfo_binary_path); - checkFileExists(this.xsltproc_binary_path); - // mediainfo metadata extraction - callMediainfoAndResultProcessing(filePath); - } - - private void callMediainfo(String file_to_extract_from, String temp_mediainfo_outputfile ) throws Exception { - String exec_mediainfo_string = this.mediainfo_binary_path + " -f --Output=XML " + file_to_extract_from; - System.out.println("executing: " + exec_mediainfo_string); - checkFileExists(this.mediainfo_binary_path); - InputStreamReader process_out; - try { - Process p = Runtime.getRuntime().exec(exec_mediainfo_string); - p.waitFor(); - process_out = new InputStreamReader(p.getInputStream()); - } catch (IOException e) { - //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); - System.out.println("ERROR: (actual) mediainfo not available, path=" + this.mediainfo_binary_path + ", " + e.getMessage()); - throw new Exception("ERROR: (actual) mediainfo not available, path=" + this.mediainfo_binary_path + ", " + e.getMessage()); - } - BufferedReader reader = new BufferedReader(process_out); - String line = reader.readLine(); - StringBuilder mediainfo_output = new StringBuilder(); - while (line != null) { - /* patch out mediainfo schema location because downloading of xsd is not allowed */ - /* example input: - <MediaInfo - xmlns="https://mediaarea.net/mediainfo" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="https://mediaarea.net/mediainfo https://mediaarea.net/mediainfo/mediainfo_2_0.xsd" - version="2.0"> - */ - String regex = "xsi:schemaLocation=\"[^\"]*\""; - String line_patched = line.replaceAll(regex, ""); - mediainfo_output.append(line_patched); - if (!line.equals(line_patched)) { - System.out.println("monkey patched line:\n- " + line + " to new line:\n+ " + line_patched); - } - line = reader.readLine(); - } - reader.close(); - FileWriter temp_media_outputstream = new FileWriter(temp_mediainfo_outputfile); - BufferedWriter temp_media_streamwriter = new BufferedWriter(temp_media_outputstream); - temp_media_streamwriter.append(mediainfo_output); - temp_media_streamwriter.flush(); - temp_media_outputstream.flush(); - temp_media_outputstream.close(); - temp_media_streamwriter.close(); - } - - private void callMediainfoAndResultProcessing(String filePath) throws Exception { - File temp_media_outputfile = File.createTempFile("mediainfo_outp", ".xml"); - temp_media_outputfile.deleteOnExit(); - callMediainfo(filePath, String.valueOf(temp_media_outputfile)); - if (temp_media_outputfile.length() > 0) { - File temp_media_transformed_outputfile = File.createTempFile("mediainfo_transf_", ".xml"); - temp_media_transformed_outputfile.deleteOnExit(); - //OutputStream temp_media_outputstream = new FileOutputStream(temp_media_outputfile); - transformMediainfoOutput(temp_media_outputfile, temp_media_transformed_outputfile); - if (temp_media_transformed_outputfile.length() > 0) { - /* TODO: read transformed outputfile and return attributes */ - extractAttributesOfTransformedResult(temp_media_transformed_outputfile); - } else { - throw new Exception( "size of intermediate transformed file '" + temp_media_transformed_outputfile + "' is zero, something broken"); - } - } else { - throw new Exception( "size of intermediate mediainfo file '" + temp_media_outputfile + "' is zero, something broken"); - } - temp_media_outputfile.delete(); - } - - private void transformMediainfoOutput(File temp_media_outputfile, File temp_media_transformed_outputfile) throws Exception { - File temp_xsdfile = createTempXsdFile(); - /* xslt transform */ - String exec_xsltproc_string = xsltproc_binary_path + " -o " + temp_media_transformed_outputfile + " " + temp_xsdfile + " " + temp_media_outputfile; - System.out.println("executing: " + exec_xsltproc_string); - InputStreamReader process_out; - try { - Process p = Runtime.getRuntime().exec(exec_xsltproc_string); - p.waitFor(); - process_out = new InputStreamReader(p.getInputStream()); - } catch (IOException e) { - //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); - System.out.println("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage()); - throw new Exception("ERROR: (actual) xsltproc not available, path=" + xsltproc_binary_path + ", " + e.getMessage()); - } finally { - temp_xsdfile.delete(); - } - } - - private File createTempXsdFile() throws Exception { - /* write xsd */ - File temp_xsdfile = File.createTempFile("mediainfo_style", ".xsd"); - InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); - if (stylestream == null) { - throw new Exception("stylestream not found!"); - } - Files.copy(stylestream, Path.of(String.valueOf(temp_xsdfile)), StandardCopyOption.REPLACE_EXISTING); - stylestream.close(); - return temp_xsdfile; - } - - private void extractAttributesOfTransformedResult(File temp_media_transformed_outputfile) throws Exception { - checkFileExists(String.valueOf(temp_media_transformed_outputfile)); - XPathFactory xPathfactory = XPathFactory.newInstance(); - XPath xpath = xPathfactory.newXPath(); - XPathExpression expr = xpath.compile("/mdExtractor/attributes/key"); - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - Document temp_media_transformed_source = db.parse(temp_media_transformed_outputfile); - NodeList nl = (NodeList) expr.evaluate(temp_media_transformed_source, XPathConstants.NODESET); - int len = nl.getLength(); - for ( int i = 0; i < len; i++ ) { - Node node = nl.item(i); - String id = String.valueOf(node.getAttributes().getNamedItem("id").getTextContent()); - String value = node.getTextContent(); - attributes.put(id, value); - } - } - - - public String getAgentName() - { - return "mediainfo"; - } - - /** get agent version and signature version calling command VERSION - * - * @return string with version and signature version - */ - public String getAgent() { - StringBuilder response = new StringBuilder(); - response.append("mediaconch:\n"); - - String[] executables = { - this.mediainfo_binary_path - }; - for (String executable : executables) { - String execstring = executable + " --Version"; - InputStreamReader process_out = null; - try { - checkFileExists( executable); - Process p = Runtime.getRuntime().exec(execstring); - p.waitFor(); - process_out = new InputStreamReader(p.getInputStream()); - } catch (IOException e) { - //log.error("exception creation socket, clamd not available at host=" + host + "port=" + port, e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - e.printStackTrace(); - } catch (Exception e) { - e.printStackTrace(); - } - if (process_out != null) { - BufferedReader reader = new BufferedReader(process_out); - String line; - try { - line = reader.readLine(); - while (line != null) { - System.out.println(line); - response.append(line); - try { - line = reader.readLine(); - } catch (IOException e) { - e.printStackTrace(); - } - } - reader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - return response.toString().trim(); - } - - @Override - public String getAttributeByName(String attribute) { - if (attributes.containsKey(attribute)) { - return attributes.get(attribute); - } - // HINT: Rosetta expects a number of specific DNX properties instead of text - if (attributesMappedToDnxPropertiesOfTypeNumber.contains( attribute )) { - return "0"; // type NUMBER - } - return "not found"; // type TEXT & STRING - } - - @Override - public List<String> getExtractionErrors() { - List<String> extractionErrors = this.extractionErrors; - return Collections.unmodifiableList(extractionErrors); - } - - /* base is the property file from original mediainfo-plugin of FL working group */ - @Override - public List<String> getSupportedAttributeNames() { - //return new ArrayList<String>(attributes.keySet()); - List<String> available = new ArrayList<>(); - try { - XPathFactory xPathfactory = XPathFactory.newInstance(); - XPath xpath = xPathfactory.newXPath(); - XPathExpression expr = xpath.compile("//*[local-name()='properties']/*[local-name()='property']"); - //XPathExpression expr = xpath.compile("//text()"); - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = dbf.newDocumentBuilder(); - InputStream stylestream = getClass().getResourceAsStream(TRANSFORMER_XSL); - Document temp_property_list_xml = db.parse(stylestream); - NodeList nl = (NodeList) expr.evaluate(temp_property_list_xml, XPathConstants.NODESET); - int len = nl.getLength(); - for (int i = 0; i < len; i++) { - Node node = nl.item(i); - String value = node.getTextContent(); - available.add(value); - } - } catch (Exception e) { - System.out.println("error in getSupportedAttributeNaes, " + e.getMessage()); - } - return available; - } - - - private void checkFileExists (String filename) throws Exception { - File f = new File(filename); - if (! f.exists() ) { - System.out.println("ERROR: path=" + filename + " not available"); - throw new Exception("ERROR: path=" + filename + " not available"); - } - } - - /** stand-alone check, main file to call local installed clamd - * @param args list of files which should be scanned - */ - public static void main(String[] args) { - TechnicalMetadataExtractorMediaInfoPlugin plugin = new TechnicalMetadataExtractorMediaInfoPlugin(); - Map<String, String> initp = new HashMap<>(); - initp.put( "xsltproc_binary_path", "/usr/bin/xsltproc"); - initp.put( "mediainfo_binary_path", "/usr/bin/mediainfo"); - plugin.initParams( initp ); - System.out.println("----------------------------------"); - System.out.println("Agent: '" + plugin.getAgent() + "'"); - System.out.println(); - for (String file : args) { - try { - System.out.println("extracting from " + file); - plugin.extract(file); - } catch (Exception e) { - System.out.println("CALLERROR:"); - e.printStackTrace(); - return; - } - System.out.println("ERRORMESSAGE: " + plugin.getExtractionErrors()); - } - System.out.println("----------------------------------"); - System.out.println("getAgentName:"); - System.out.println( plugin.getAgentName()); - System.out.println("----------------------------------"); - System.out.println("getAgent:"); - System.out.println( plugin.getAgent()); - System.out.println("----------------------------------"); - System.out.println("getSupportedAttributeNames:"); - System.out.println( plugin.getSupportedAttributeNames()); - System.out.println("----------------------------------"); - System.out.println("getAttributeByName (summarized):"); - for (Map.Entry<String, String> m : plugin.attributes.entrySet()) { - String s = m.getKey() + " -> " + m.getValue(); - System.out.println(s); - } - } -} diff --git a/test.sh b/test.sh deleted file mode 100644 index d51d847..0000000 --- a/test.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# check arguments -if [ "$#" -lt 1 ]; then echo "error: no media file given"; exit 1; fi -if [ ! -f "$1" ]; then echo "error: media file '$1' does not exist"; exit 1; fi - -# set variables -MEDIA_FILE="$1"; -ROSETTAVERSION="7.1.0" -ROSETTA_SDK_DIR="/exlibris/dps/d4_1/system.dir/dps-sdk-${ROSETTAVERSION}" -ROSETTA_SDK_JAR="${ROSETTA_SDK_DIR}/lib/dps-sdk-${ROSETTAVERSION}.jar" -PLUGIN_JAR="./TechnicalMetadataExtractorMediaInfoPlugin.jar" -PLUGIN_NAME="org.slub.rosetta.dps.repository.plugin.TechnicalMetadataExtractorMediaInfoPlugin" - -# run plugin -java -cp "${ROSETTA_SDK_JAR}:${PLUGIN_JAR}" "${PLUGIN_NAME}" "${MEDIA_FILE}" \ No newline at end of file -- GitLab