diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java index 6b59d8b36005087fec629f944bf604ae0246a844..1ac1c15a36f99edf5009197dc4ddd67a6bf0040b 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java @@ -24,9 +24,10 @@ import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -import org.xml.sax.helpers.DefaultHandler; import javax.xml.XMLConstants; +import javax.xml.catalog.CatalogFeatures; +import javax.xml.catalog.CatalogManager; import javax.xml.catalog.CatalogResolver; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; @@ -37,7 +38,9 @@ import javax.xml.validation.Schema; import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -78,48 +81,48 @@ class xmlInfoRecord { } class ValidationCatalogResolver implements CatalogResolver { - private HashMap<String,String> public2res = new HashMap() { + private final HashMap<String,String> public2res = new HashMap<>() { // TODO: add support for catalog.xml files. { put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN", "/resources/dtd/JATS-journalpublishing1.dtd"); - put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD-Specific Modules v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-modules1.ent"); - put("-//NLM//DTD JATS (Z39.96) JATS DTD Suite Module of Modules v1.1d1 20130915//EN", "/resources/dtd/JATS-modules1.ent"); - put("-//NLM//DTD JATS (Z39.96) JATS DTD Suite Common Attributes (for all elements) v1.1d1 20130915//EN", "/resources/dtd/JATS-common-atts1.ent"); - put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD Customize Classes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-classes1.ent"); - put("-//NLM//DTD JATS (Z39.96) Default Element Classes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-default-classes1.ent"); - put("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD Customize Mixes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-mixes1.ent"); - put("-//NLM//DTD JATS (Z39.96) Default Element Mixes Module v1.1d1 20130915//EN", "/resources/dtd/JATS-journalpubcustom-mixes1.ent"); - } - }; - private HashSet<String> allowedDtds = new HashSet<>() { - { - add("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN"); + put("-//Recordare//DTD MusicXML 4.0 Partwise//EN", "http://www.musicxml.org/dtds/partwise.dtd"); + put("-//OASIS//DTD DocBook XML V4.2//EN" , "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"); + put("-//OASIS//DTD DocBook XML V4.3//EN" , "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"); + put("-//OASIS//DTD DocBook XML V4.4//EN" , "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"); + } }; + private final HashSet<String> allowedDtds = new HashSet<>(Arrays.asList("-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1 20151215//EN")); + @Override public InputSource resolveEntity(String publicId, String systemId) { + System.out.println("======"); + System.out.println("resolveEntity\n\tpublicId=" + publicId + "\n\tsystemId=" + systemId ); + /* TODO: only allow catalog calls to defined ressources */ if (allowedDtds.contains(publicId)) { - /* todo */ - DefaultHandler defaultHandler = new DefaultHandler(); - try { - return defaultHandler.resolveEntity(publicId, systemId); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (SAXException e) { - System.out.println("default resolver exc: " + e.getMessage()); - e.getStackTrace(); + System.out.println("fallback to CatalogResolver"); + //CatalogFeatures f = CatalogFeatures.builder().with(CatalogFeatures.Feature.RESOLVE, "continue").build(); + var strippedSystemId = systemId; + if (!systemId.startsWith("http://")) { + strippedSystemId = "file:/etc/xml/" + systemId.substring(systemId.lastIndexOf("/") + 1); } + System.out.println("\tusing URI=" + strippedSystemId); + CatalogResolver cr = CatalogManager.catalogResolver(CatalogFeatures.defaults(), URI.create(strippedSystemId)); + return cr.resolveEntity(publicId, systemId); } if (public2res.containsKey(publicId)) { - System.out.println("\tressoure=" + public2res.get(publicId)); + System.out.println("\tressource=" + public2res.get(publicId)); InputSource is = new InputSource(); is.setByteStream(SLUBXmlFormatValidationPlugin.class.getResourceAsStream(public2res.get(publicId))); is.setPublicId(publicId); is.setSystemId(systemId); return is; } - System.out.println("======"); - System.out.println("resolveEntity\n\tpublicId=" + publicId + "\n\tsystemId=" + systemId ); + System.out.println("\tnot implemented (no fallback, no mapping)"); + if (systemId.startsWith("http://")) { + CatalogResolver cr = CatalogManager.catalogResolver(CatalogFeatures.defaults(), URI.create(systemId)); + return cr.resolveEntity(publicId, systemId); + } return null; } @@ -148,17 +151,17 @@ class ValidationCatalogResolver implements CatalogResolver { class ValidationErrorHandler implements ErrorHandler { @Override - public void warning(SAXParseException e) throws SAXException { - System.out.println("WARN: " + e.getMessage()); + public void warning(SAXParseException e) { + //System.out.println("WARN: " + e.getMessage()); } @Override - public void error(SAXParseException e) throws SAXException { - System.out.println("ERROR: " + e.getMessage()); + public void error(SAXParseException e) { + //System.out.println("ERROR: " + e.getMessage()); } @Override - public void fatalError(SAXParseException e) throws SAXException { + public void fatalError(SAXParseException e) { System.out.println("FATAL: " + e.getMessage()); } } @@ -174,8 +177,8 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private boolean valid = false; private boolean wellformed = false; private final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); - private List<String> errors = new ArrayList<>(); - private List<String> detail = new ArrayList<>(); + private final List<String> errors = new ArrayList<>(); + private final List<String> details = new ArrayList<>(); private static final HashSet<validationSchema> namespaceSchemaMap = new HashSet<>() { { add(new validationSchema("http://www.loc.gov/standards/alto/ns-v2#", schema, "http://www.loc.gov/standards/alto/alto-v2.0.xsd")); @@ -186,8 +189,8 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { // put("http://www.tei-c.org/ns/1.0", ""); } }; - private ValidationCatalogResolver validationCatalogResolver = new ValidationCatalogResolver(); - private ValidationErrorHandler validationErrorHandler = new ValidationErrorHandler(); + private final ValidationCatalogResolver validationCatalogResolver = new ValidationCatalogResolver(); + private final ValidationErrorHandler validationErrorHandler = new ValidationErrorHandler(); public SLUBXmlFormatValidationPlugin () { dbf.setNamespaceAware(true); dbf.setValidating(false); @@ -304,6 +307,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { dbf.setValidating(true); DocumentBuilder dbValidate = dbf.newDocumentBuilder(); Document docValidate; + dbValidate.setEntityResolver(validationCatalogResolver); dbValidate.setErrorHandler(validationErrorHandler); reportDetail("align entitity resolver"); @@ -338,7 +342,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { } private void reportDetail(String msg) { - detail.add(msg); + details.add(msg); System.out.println(msg); } @@ -364,7 +368,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { @Override public String getValidationDetails() { - return detail.stream().reduce("", (all, res) -> all.concat("\n") .concat(res) ); + return details.stream().reduce("", (all, res) -> all.concat("\n") .concat(res) ); } /** stand-alone check, main file to call local installed clamd