diff --git a/PLUGIN-INF/metadata_SLUBXmlFormatValidationPlugin.xml b/PLUGIN-INF/metadata_SLUBXmlFormatValidationPlugin.xml index d3b9385c3cafc7220b25d10fd4718bfc73f3171c..1c2117eb98297a14fc8f764b70ec884da9ef7343 100644 --- a/PLUGIN-INF/metadata_SLUBXmlFormatValidationPlugin.xml +++ b/PLUGIN-INF/metadata_SLUBXmlFormatValidationPlugin.xml @@ -9,7 +9,18 @@ <description>SLUBXmlFormatValidationPlugin_parameters_form</description> <version>1.0</version> <grid_x>1</grid_x> - <x_fields> + <x_fields> + <x_field> + <field_name>catalog</field_name> + <label>Path to XML catalog file</label> + <ui_tool_tip>add full path to own XML catalog file</ui_tool_tip> + <mandatory>true</mandatory> + <x_logic_type>String</x_logic_type> + <x_ui_type>TextField</x_ui_type> + <default_value>/etc/xml/catalog</default_value> + <css_class>width40</css_class> + <x_options/> + </x_field> </x_fields> </fr:x_form> </pl:initParameters> diff --git a/README.md b/README.md index a6f46a4dec4de10a13ae91ad442069f37cb74c7f..9ccc8f2d10ceb7bc52d82526a7c07049f55841ac 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,12 @@ Compile Install ------- -* copy jar-file to _/operational_shared/plugins/custom/_ +* copy jar-file to _/operational_shared/plugins/custom/ Configuration ------------- +* the plugin uses the XML-catalog file specified via plugin settings (if no catalog entry exists an error is reported) Copyright hints --------------- diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java index 915b65f0583ce60cb237a462230d8bc8f268c94a..6b10fe158ce780138f793f816621e183f13332d9 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java @@ -25,16 +25,11 @@ import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.stream.StreamSource; import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Optional; - -import static org.slub.rosetta.dps.repository.plugin.validationSchemaType.schema; +import java.util.Map; enum validationSchemaType { schema, dtd, relaxng, schematron, nothing @@ -54,32 +49,24 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); private final List<String> errors = new ArrayList<>(); private final List<String> details = new ArrayList<>(); - private static final HashSet<validationSchema> namespaceSchemaMap = new HashSet<>() { - { - add(new validationSchema("http://www.loc.gov/standards/alto/ns-v2#", schema, "http://www.loc.gov/standards/alto/alto-v2.0.xsd")); - add(new validationSchema("http://www.loc.gov/mods/v3", schema, "http://www.loc.gov/standards/mods/v3/mods-3-6.xsd")); -// put("http://www.opengis.net/citygml/profiles/base/1.0", ""); -// put("http://www.opengis.net/kml/2.2", ""); -// put("http://www.music-encoding.org/ns/mei", ""); -// put("http://www.tei-c.org/ns/1.0", ""); - } - }; - private final ValidationCatalogResolver validationCatalogResolver = new ValidationCatalogResolver(); + private ValidationCatalogResolver validationCatalogResolver = null; private final ValidationErrorHandler validationErrorHandler = new ValidationErrorHandler(); public SLUBXmlFormatValidationPlugin () { dbf.setNamespaceAware(true); dbf.setValidating(false); dbf.setExpandEntityReferences(false); - - - } - private StreamSource getLocalSchema(validationSchema v) { - var schemaLocalURL = "resources/" + v.schemaType + "/" + v.schemaURL; - InputStream schemaInputStream = this.getClass().getResourceAsStream( schemaLocalURL); - StreamSource ss = new StreamSource( schemaInputStream ); - return ss; + + /** init params to configure the plugin via xml forms + * @param initp parameter map + */ + public void initParams(Map<String, String> initp) { + String[] catalogs = new String[] { + initp.get("catalog").trim() + }; + validationCatalogResolver = new ValidationCatalogResolver(catalogs, errors); } + private static xmlInfoRecord getXMLinfo(Document doc) { String namespaceURI = doc.getNamespaceURI(); String documentURI = doc.getDocumentURI(); @@ -108,43 +95,8 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { + "\n" ); } - private static validationSchemaType assignDtdIfApplicable(Document doc) { - var info = getXMLinfo(doc); - if (null != info.systemID && info.systemID.endsWith(".dtd")) { - return validationSchemaType.dtd; - } else if (null != info.systemID ) { - - } - return validationSchemaType.nothing; - } - private static Optional<validationSchema> assignSchema(Document doc) { - xmlInfoRecord info = getXMLinfo(doc); - Optional<validationSchema> optEle = Optional.empty(); - if (null == info.nameSpaceUri) { - /* try if a DTD is assignable */ - var type = assignDtdIfApplicable(doc); - if (type.equals(validationSchemaType.dtd)) { - System.out.println("found schema " + type); - var ele = new validationSchema(info.nameSpaceUri, type, info.systemID); - optEle = Optional.of(ele); - } - } else { - optEle = namespaceSchemaMap.stream() - .filter( - entry -> (entry.schemaType.equals(validationSchemaType.schema)) && (entry.nameSpace.equals(info.nameSpaceUri)) - ) - .findAny(); - } - if ( optEle.isPresent() ) { - System.out.println("found namespace " + optEle.get().nameSpace ); - System.out.println("found schematype " + optEle.get().schemaType ); - System.out.println("found schemaURL " + optEle.get().schemaURL ); - } else { - System.out.println("no element found"); - } - return optEle; - } + @Override public boolean validateFormat(String filePath) { try { @@ -155,7 +107,6 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { Document doc = db.parse(new File(filePath)); xmlInfoRecord info = getXMLinfo(doc); reportDetail("detect XML type via NS:" + info.nameSpaceUri); - printXMLinfo(doc); if (!info.xmlVersion.equals("1.0")) { reportError("not an expected XML 1.0 document, found " + info.xmlVersion, filePath); @@ -165,39 +116,21 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { } wellformed=true; reportDetail("checked XML is wellformed"); - /* TODO: align corresponding Schema based on systemID */ - Optional<validationSchema> schema = assignSchema(doc); - if (schema.isEmpty()) { - reportError("there is no related schema found in *our* catalog of allowed XML types.", filePath); - } else { - reportDetail("assigned schema of type: " + schema.get().schemaType); - reportDetail("assigned schema url: " + schema.get().schemaURL); - if (schema.get().schemaType == validationSchemaType.dtd) { - } else if (!schema.get().schemaURL.isBlank()) { - dbf.setSchema(schema.get().schemaInst); - } - } /* TODO: validate against schema */ dbf.setFeature(XMLConstants.USE_CATALOG, true); dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); dbf.setValidating(true); DocumentBuilder dbValidate = dbf.newDocumentBuilder(); Document docValidate; - dbValidate.setEntityResolver(validationCatalogResolver); dbValidate.setErrorHandler(validationErrorHandler); - - reportDetail("align entitity resolver"); docValidate = dbValidate.parse(new File(filePath)); assert(dbValidate.isValidating()); var version = docValidate.getXmlVersion(); - reportDetail("reparse with assigned schema"); + reportDetail("reparse using own catalog"); valid = true; wellformed = true; - - - } catch (ParserConfigurationException e) { reportError("ParserconfExc file=" + filePath + " Exc:" + e.getMessage()); } catch (IOException e) { diff --git a/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java index c6bb18b40cc189786b2d75bcf310cdb1bb1a7162..ec8d6176235c1238283a1a0791cf86170a59fdf9 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java @@ -7,9 +7,12 @@ import org.junit.runners.JUnit4; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; import java.util.stream.Stream; -import static org.junit.Assert.*; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; /** * Tests for {@link SLUBXmlFormatValidationPlugin}. @@ -32,6 +35,8 @@ public class TestSLUBXmlFormatValidationPlugin { // do nothing, because nothing found } mock = new SLUBXmlFormatValidationPlugin(); + Map<String, String> initp = new HashMap<>(); + initp.put("catalog", "/etc/xml/catalog"); } @org.junit.Test diff --git a/java/org/slub/rosetta/dps/repository/plugin/ValidationCatalogResolver.java b/java/org/slub/rosetta/dps/repository/plugin/ValidationCatalogResolver.java index b3a36ccb0e1b5aa476223537499b4abefa2a6667..e42a5740714ff7be5acca41af706f10bc5e5ad0b 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/ValidationCatalogResolver.java +++ b/java/org/slub/rosetta/dps/repository/plugin/ValidationCatalogResolver.java @@ -10,30 +10,24 @@ import javax.xml.catalog.CatalogResolver; import javax.xml.transform.Source; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; +import java.util.List; class ValidationCatalogResolver implements CatalogResolver { - private final String[] catalogs = { - "/etc/xml/catalog", - "/usr/share/xml/docbook/schema/dtd/catalog.xml", - /* TODO */ - }; - private final HashMap<String, String> public2res = new HashMap<>() { - // TODO: add support for catalog.xml files. - { - put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN", "/resources/dtd/JATS-journalpublishing1.dtd"); - put("-//Recordare//DTD MusicXML 4.0 Partwise//EN", "http://www.musicxml.org/dtds/partwise.dtd"); - put("-//OASIS//DTD DocBook XML V4.2//EN", "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"); - put("-//OASIS//DTD DocBook XML V4.3//EN", "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"); - put("-//OASIS//DTD DocBook XML V4.4//EN", "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"); - + private String[] catalogs; + private List<String> errors = null; + ValidationCatalogResolver(String[] catalogs, List<String> errors) { + this.catalogs = catalogs; + this.errors = errors; + } + @Override + public InputSource resolveEntity(String publicId, String systemId) { + System.out.println("======"); + System.out.println("resolveEntity publicId=" + publicId + " systemId=" + systemId); + if (catalogs.length == 0) { + System.out.println("No catalog given!"); + errors.add("No catalog given!"); + return null; } - }; - private final HashSet<String> allowedDtds = new HashSet<>(Arrays.asList("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN")); - - private InputSource callDefaultCatalogResolver (String publicId, String systemId) { InputSource result = null; try { XMLCatalogResolver cr = new XMLCatalogResolver(); @@ -42,37 +36,10 @@ class ValidationCatalogResolver implements CatalogResolver { result = cr.resolveEntity(publicId, systemId); } catch (IOException | CatalogException | SAXException e) { System.out.println("No mapping found for publicId=" + publicId + ", systemId=" + systemId + ", " + e.getMessage()); + errors.add("No mapping found for publicId=" + publicId + ", systemId=" + systemId + ", " + e.getMessage()); } return result; } - @Override - public InputSource resolveEntity(String publicId, String systemId) { - System.out.println("======"); - System.out.println("resolveEntity\n\tpublicId=" + publicId + "\n\tsystemId=" + systemId); - /* TODO: only allow catalog calls to defined ressources */ - if (allowedDtds.contains(publicId)) { - System.out.println("fallback to CatalogResolver"); - var strippedSystemId = systemId; - if (!systemId.startsWith("http://")) { - strippedSystemId = "file:/etc/xml/" + systemId.substring(systemId.lastIndexOf("/") + 1); - } - System.out.println("\tusing URI=" + strippedSystemId); - return callDefaultCatalogResolver(publicId, systemId); - } - if (public2res.containsKey(publicId)) { - System.out.println("\tressource=" + public2res.get(publicId)); - InputSource is = new InputSource(); - is.setByteStream(SLUBXmlFormatValidationPlugin.class.getResourceAsStream(public2res.get(publicId))); - is.setPublicId(publicId); - is.setSystemId(systemId); - return is; - } - System.out.println("\tnot implemented (no fallback, no mapping)"); - if (systemId.startsWith("http://")) { - return callDefaultCatalogResolver(publicId, systemId); - } - return null; - } @Override public Source resolve(String href, String base) {