diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java index 053af3963b908645a8c21558d3744032047c1458..fe430d22dad34dc62dca7a6d7ecda550f4a669e1 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java @@ -47,15 +47,16 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private boolean valid = false; private boolean wellformed = false; + private static String schemaDir = ""; private final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); private final List<String> errors = new ArrayList<>(); private final List<String> details = new ArrayList<>(); - private static final HashSet<validationSchema> namespaceSchemaMap = new HashSet<>() { + private static final HashSet<ValidationSchema> namespaceSchemaMap = new HashSet<>() { { - add(new validationSchema("http://www.loc.gov/standards/alto/ns-v2#", ValidationSchemaType.schema, "http://www.loc.gov/standards/alto/alto-v2.0.xsd")); - add(new validationSchema("http://www.loc.gov/mods/v3", ValidationSchemaType.schema, "http://www.loc.gov/standards/mods/v3/mods-3-8.xsd")); - add(new validationSchema("http://www.lido-schema.org", ValidationSchemaType.schema, "http://www.lido-schema.org/schema/v1.1/lido-v1.1.xsd")); - add(new validationSchema( "http://slubarchiv.slub-dresden.de/rights1", ValidationSchemaType.schema, "https://slubarchiv.slub-dresden.de/fileadmin/groups/slubsite/slubarchiv/standards/rights/rights1.xsd")); + add(new ValidationSchema("http://www.loc.gov/standards/alto/ns-v2#", ValidationSchemaType.schema, "http://www.loc.gov/standards/alto/alto-v2.0.xsd")); + add(new ValidationSchema("http://www.loc.gov/mods/v3", ValidationSchemaType.schema, "http://www.loc.gov/standards/mods/v3/mods-3-8.xsd")); + add(new ValidationSchema("http://www.lido-schema.org", ValidationSchemaType.schema, "http://www.lido-schema.org/schema/v1.1/lido-v1.1.xsd")); + add(new ValidationSchema( "http://slubarchiv.slub-dresden.de/rights1", ValidationSchemaType.schema, "https://slubarchiv.slub-dresden.de/fileadmin/groups/slubsite/slubarchiv/standards/rights/rights1.xsd")); // put("http://www.opengis.net/citygml/profiles/base/1.0", ""); // put("http://www.opengis.net/kml/2.2", ""); // put("http://www.music-encoding.org/ns/mei", ""); @@ -84,6 +85,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { dbf.setNamespaceAware(true); dbf.setValidating(false); dbf.setExpandEntityReferences(false); + } /** init params to configure the plugin via xml forms @@ -94,6 +96,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { initp.get("catalog").trim() }; validationCatalogResolver = new ValidationCatalogResolver(catalogs, errors); + schemaDir = initp.get("schemadir").trim(); } private static xmlInfoRecord getXMLinfo(Document doc) { @@ -112,7 +115,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { } return new xmlInfoRecord(namespaceURI, documentURI, xmlVersion, systemId, publicId); } - private static void printXMLinfo(Document doc) { + private void printXMLinfo(Document doc) { xmlInfoRecord info = getXMLinfo(doc); System.out.println( "\n-------------------------------------------" @@ -124,15 +127,32 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { + "\n" ); } - private static Optional<validationSchema> assignSchema(Document doc) { + private ValidationSchema localizeSchemaUrl(ValidationSchema validationSchema) { + switch (validationSchema.schemaType) { + case schema: + case schematron: + case relaxng: { + validationSchema.schemaLocalURL = + schemaDir + + validationSchema.schemaURL.substring( + validationSchema.schemaURL.lastIndexOf("/") + ); + } + case dtd: + case nothing: + break; + } + return validationSchema; + } + private Optional<ValidationSchema> assignSchema(Document doc) { xmlInfoRecord info = getXMLinfo(doc); - Optional<validationSchema> optEle = Optional.empty(); + Optional<ValidationSchema> optEle = Optional.empty(); if (null == info.nameSpaceUri) { /* try if a DTD is assignable */ var type = assignDtdIfApplicable(doc); if (type.equals(ValidationSchemaType.dtd)) { System.out.println("found schema " + type); - var ele = new validationSchema(info.nameSpaceUri, type, info.systemID); + var ele = new ValidationSchema(null, type, info.systemID); optEle = Optional.of(ele); } } else { @@ -140,12 +160,14 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { .filter( entry -> (entry.schemaType.equals(ValidationSchemaType.schema)) && (entry.nameSpace.equals(info.nameSpaceUri)) ) + .map(this::localizeSchemaUrl) .findAny(); } if ( optEle.isPresent() ) { System.out.println("found namespace " + optEle.get().nameSpace ); System.out.println("found schematype " + optEle.get().schemaType ); System.out.println("found schemaURL " + optEle.get().schemaURL ); + System.out.println("found localized schemaURL " + optEle.get().schemaLocalURL ); } else { System.out.println("no element found"); } @@ -193,23 +215,25 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private boolean checkIfWellformed(String filePath) throws ParserConfigurationException, IOException, SAXException { /* detect XML type via NS */ boolean isWellformedXml = false; + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); dbf.setValidating(false); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new File(filePath)); xmlInfoRecord info = getXMLinfo(doc); reportDetail("detect XML type via NS:" + info.nameSpaceUri); /* TODO: align corresponding Schema based on systemID */ - Optional<validationSchema> schema = assignSchema(doc); + Optional<ValidationSchema> schema = assignSchema(doc); if (schema.isEmpty()) { reportError("there is no related schema found in *our* catalog of allowed XML types.", filePath); } else { reportDetail("assigned schema of type: " + schema.get().schemaType); reportDetail("assigned schema url: " + schema.get().schemaURL); if (schema.get().schemaType == ValidationSchemaType.dtd) { - assert(dbf.isValidating() == false); + assert(!dbf.isValidating()); dbf.setValidating(true); /* only used if DTD */ - assert(dbf.isValidating() == true); + assert(dbf.isValidating()); dbf.setFeature(XMLConstants.USE_CATALOG, true); dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); System.out.println("-> dtd detected, use catalog"); diff --git a/java/org/slub/rosetta/dps/repository/plugin/validationSchema.java b/java/org/slub/rosetta/dps/repository/plugin/ValidationSchema.java similarity index 84% rename from java/org/slub/rosetta/dps/repository/plugin/validationSchema.java rename to java/org/slub/rosetta/dps/repository/plugin/ValidationSchema.java index 91cb7111ec4ba4c29422644d4924d944176a310b..5346d7b20ec51afd9cb2ec931c3659943db06742 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/validationSchema.java +++ b/java/org/slub/rosetta/dps/repository/plugin/ValidationSchema.java @@ -3,7 +3,7 @@ package org.slub.rosetta.dps.repository.plugin; import javax.xml.validation.Schema; import java.io.InputStream; -class validationSchema { +class ValidationSchema { public final String nameSpace; public final ValidationSchemaType schemaType; public final String schemaURL; @@ -11,7 +11,7 @@ class validationSchema { public InputStream schemaInputStream; public String schemaLocalURL; - public validationSchema(String nameSpace, ValidationSchemaType schemaType, String schemaURL) { + public ValidationSchema(String nameSpace, ValidationSchemaType schemaType, String schemaURL) { this.nameSpace = nameSpace; this.schemaURL = schemaURL; this.schemaType = schemaType;