diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java index fedae34c99e8395db3b31c7d886ed32fec11bbda..6b59d8b36005087fec629f944bf604ae0246a844 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java @@ -19,15 +19,26 @@ package org.slub.rosetta.dps.repository.plugin; import com.exlibris.core.infra.common.exceptions.logging.ExLogger; import com.exlibris.dps.sdk.techmd.FormatValidationPlugin; import org.w3c.dom.Document; +import org.w3c.dom.ls.LSInput; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; import javax.xml.XMLConstants; +import javax.xml.catalog.CatalogResolver; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Source; +import javax.xml.transform.stream.StreamSource; +import javax.xml.validation.Schema; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Optional; @@ -41,6 +52,9 @@ class validationSchema { public final String nameSpace; public final validationSchemaType schemaType; public final String schemaURL; + public Schema schemaInst; + public InputStream schemaInputStream; + public String schemaLocalURL; public validationSchema(String nameSpace, validationSchemaType schemaType, String schemaURL) { this.nameSpace = nameSpace; this.schemaURL = schemaURL; @@ -63,6 +77,92 @@ class xmlInfoRecord { } } +class ValidationCatalogResolver implements CatalogResolver { + private HashMap<String,String> public2res = new HashMap() { + // TODO: add support for catalog.xml files. + { + put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN", "/resources/dtd/JATS-journalpublishing1.dtd"); + put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD-Specific Modules v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-modules1.ent"); + put("-//NLM//DTD JATS (Z39.96) JATS DTD Suite Module of Modules v1.1d1 20130915//EN", "/resources/dtd/JATS-modules1.ent"); + put("-//NLM//DTD JATS (Z39.96) JATS DTD Suite Common Attributes (for all elements) v1.1d1 20130915//EN", "/resources/dtd/JATS-common-atts1.ent"); + put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD Customize Classes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-classes1.ent"); + put("-//NLM//DTD JATS (Z39.96) Default Element Classes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-default-classes1.ent"); + put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD Customize Mixes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-mixes1.ent"); + put("-//NLM//DTD JATS (Z39.96) Default Element Mixes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-mixes1.ent"); + } + }; + private HashSet<String> allowedDtds = new HashSet<>() { + { + add("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN"); + } + }; + @Override + public InputSource resolveEntity(String publicId, String systemId) { + if (allowedDtds.contains(publicId)) { + /* todo */ + DefaultHandler defaultHandler = new DefaultHandler(); + try { + return defaultHandler.resolveEntity(publicId, systemId); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (SAXException e) { + System.out.println("default resolver exc: " + e.getMessage()); + e.getStackTrace(); + } + } + if (public2res.containsKey(publicId)) { + System.out.println("\tressoure=" + public2res.get(publicId)); + InputSource is = new InputSource(); + is.setByteStream(SLUBXmlFormatValidationPlugin.class.getResourceAsStream(public2res.get(publicId))); + is.setPublicId(publicId); + is.setSystemId(systemId); + return is; + } + System.out.println("======"); + System.out.println("resolveEntity\n\tpublicId=" + publicId + "\n\tsystemId=" + systemId ); + return null; + } + + @Override + public Source resolve(String href, String base) { + System.out.println("======"); + System.out.println("resolve href=" + href + " base=" + base); + + return null; + } + + @Override + public InputStream resolveEntity(String publicId, String systemId, String baseUri, String nameSpace) { + System.out.println("======"); + System.out.println("resolveEntity2 publicId=" + publicId + " systemId=" + systemId); + return null; + } + + @Override + public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseUri) { + System.out.println("======"); + System.out.println("resolveResource publicId=" + publicId + " systemId=" + systemId); + return null; + } +} + +class ValidationErrorHandler implements ErrorHandler { + @Override + public void warning(SAXParseException e) throws SAXException { + System.out.println("WARN: " + e.getMessage()); + } + + @Override + public void error(SAXParseException e) throws SAXException { + System.out.println("ERROR: " + e.getMessage()); + } + + @Override + public void fatalError(SAXParseException e) throws SAXException { + System.out.println("FATAL: " + e.getMessage()); + } +} + /** * SLUBXmlFormatValidationPlugin * @@ -86,14 +186,21 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { // put("http://www.tei-c.org/ns/1.0", ""); } }; - + private ValidationCatalogResolver validationCatalogResolver = new ValidationCatalogResolver(); + private ValidationErrorHandler validationErrorHandler = new ValidationErrorHandler(); public SLUBXmlFormatValidationPlugin () { dbf.setNamespaceAware(true); dbf.setValidating(false); dbf.setExpandEntityReferences(false); - } + } + private StreamSource getLocalSchema(validationSchema v) { + var schemaLocalURL = "resources/" + v.schemaType + "/" + v.schemaURL; + InputStream schemaInputStream = this.getClass().getResourceAsStream( schemaLocalURL); + StreamSource ss = new StreamSource( schemaInputStream ); + return ss; + } private static xmlInfoRecord getXMLinfo(Document doc) { String namespaceURI = doc.getNamespaceURI(); String documentURI = doc.getDocumentURI(); @@ -168,7 +275,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new File(filePath)); xmlInfoRecord info = getXMLinfo(doc); - detail.add("detect XML type via NS:" + info.nameSpaceUri); + reportDetail("detect XML type via NS:" + info.nameSpaceUri); printXMLinfo(doc); if (!info.xmlVersion.equals("1.0")) { @@ -178,21 +285,36 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { return false; } wellformed=true; - detail.add("checked XML is wellformed"); + reportDetail("checked XML is wellformed"); /* TODO: align corresponding Schema based on systemID */ - var schema = assignSchema(doc); + Optional<validationSchema> schema = assignSchema(doc); if (schema.isEmpty()) { reportError("there is no related schema found in *our* catalog of allowed XML types.", filePath); - valid = false; - return false; + } else { + reportDetail("assigned schema of type: " + schema.get().schemaType); + reportDetail("assigned schema url: " + schema.get().schemaURL); + if (schema.get().schemaType == validationSchemaType.dtd) { + } else if (!schema.get().schemaURL.isBlank()) { + dbf.setSchema(schema.get().schemaInst); + } } - detail.add("assigned schema of type: " + schema.get().schemaType); - detail.add("assigned schema url: " + schema.get().schemaURL); + /* TODO: validate against schema */ + dbf.setFeature(XMLConstants.USE_CATALOG, true); + dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + dbf.setValidating(true); + DocumentBuilder dbValidate = dbf.newDocumentBuilder(); + Document docValidate; + dbValidate.setEntityResolver(validationCatalogResolver); + dbValidate.setErrorHandler(validationErrorHandler); + reportDetail("align entitity resolver"); + docValidate = dbValidate.parse(new File(filePath)); + assert(dbValidate.isValidating()); + docValidate.getXmlVersion(); + reportDetail("reparse with assigned schema"); + valid = true; + wellformed = true; - // TODO: SAXParser parser = spf.newSAXParser(); - // parser.setProperty(CatalogFeatures.Feature.FILES.getPropertyName(), "catalog.xml"); - /* TODO: validate against schema */ } catch (ParserConfigurationException e) { reportError("ParserconfExc file=" + filePath + " Exc:" + e.getMessage()); @@ -208,12 +330,18 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private void reportError(String msg) { errors.add(msg); + System.out.println(msg); // TODO: log.warn(msg); } private void reportError(String msg, String filepath) { reportError(msg + " (file=" + filepath + ")"); } + private void reportDetail(String msg) { + detail.add(msg); + System.out.println(msg); + } + @Override public String getAgent() { return "SLUBXmlFormatValidationPlugin";