diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java index fe430d22dad34dc62dca7a6d7ecda550f4a669e1..8c607091e466b46f5283b966f51010969d60ab33 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java @@ -19,6 +19,10 @@ package org.slub.rosetta.dps.repository.plugin; import com.exlibris.core.infra.common.exceptions.logging.ExLogger; import com.exlibris.dps.sdk.techmd.FormatValidationPlugin; import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -35,6 +39,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Stream; /** * SLUBXmlFormatValidationPlugin @@ -47,22 +52,54 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private boolean valid = false; private boolean wellformed = false; - private static String schemaDir = ""; + private static String namespaceSchemaMapFile = ""; private final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); private final List<String> errors = new ArrayList<>(); private final List<String> details = new ArrayList<>(); - private static final HashSet<ValidationSchema> namespaceSchemaMap = new HashSet<>() { - { - add(new ValidationSchema("http://www.loc.gov/standards/alto/ns-v2#", ValidationSchemaType.schema, "http://www.loc.gov/standards/alto/alto-v2.0.xsd")); - add(new ValidationSchema("http://www.loc.gov/mods/v3", ValidationSchemaType.schema, "http://www.loc.gov/standards/mods/v3/mods-3-8.xsd")); - add(new ValidationSchema("http://www.lido-schema.org", ValidationSchemaType.schema, "http://www.lido-schema.org/schema/v1.1/lido-v1.1.xsd")); - add(new ValidationSchema( "http://slubarchiv.slub-dresden.de/rights1", ValidationSchemaType.schema, "https://slubarchiv.slub-dresden.de/fileadmin/groups/slubsite/slubarchiv/standards/rights/rights1.xsd")); -// put("http://www.opengis.net/citygml/profiles/base/1.0", ""); -// put("http://www.opengis.net/kml/2.2", ""); -// put("http://www.music-encoding.org/ns/mei", ""); -// put("http://www.tei-c.org/ns/1.0", ""); + public static final HashSet<ValidationSchema> namespaceSchemaMap = new HashSet<>(); + + private void loadNamespaceSchemaMap() { + DocumentBuilder db = null; + try { + db = dbf.newDocumentBuilder(); + Document doc = db.parse(new File(namespaceSchemaMapFile)); + Element de = doc.getDocumentElement(); + NodeList nodes = de.getElementsByTagName("entry"); + for (int i = 0; i < nodes.getLength(); i++) { + Node n = nodes.item(i); + NamedNodeMap attributes = n.getAttributes(); + List<String> attr_strings = List.of(new String[]{"schematype", "namespace", "schemaurl"}); + Stream<String> attr_stream = attr_strings.stream().map(s -> attributes.getNamedItem(s).getTextContent()); + boolean allValid = attr_stream.allMatch(SLUBXmlFormatValidationPlugin::checkAttributesOfNamespaceSchemaMapFile); + List<String> attr_list = attr_strings.stream().toList(); + if (allValid) { + assert (attr_strings.get(0).equals("schematype")); + var attr_type = attr_list.get(0); + var schematype = ValidationSchemaType.nothing; + switch (attr_type) { + case "schema" -> schematype = ValidationSchemaType.schema; + case "schematron" -> schematype = ValidationSchemaType.schematron; + case "relaxng" -> schematype = ValidationSchemaType.relaxng; + default -> log.error("attribute schematype needs to be type of schema, schematron or relaxng, but is " + attr_type); + } + assert (attr_strings.get(1).equals("namespace")); + assert (attr_strings.get(2).equals("schemaurl")); + var namespace = attr_list.get(1); + var url = attr_list.get(2); + ValidationSchema v = new ValidationSchema(namespace, schematype, url); + namespaceSchemaMap.add(v); + } else { + log.error("invalid entry(" + i + ") in namespace schema map file " + namespaceSchemaMapFile); + } + } + } catch (ParserConfigurationException | SAXException | IOException e) { + log.error("parsing expection parsing namespace schema map file " + namespaceSchemaMapFile + " ," + e.getMessage()); } - }; + } + + private static boolean checkAttributesOfNamespaceSchemaMapFile(String attr_type) { + return null != attr_type && !attr_type.isBlank(); + } private ValidationCatalogResolver validationCatalogResolver = null; private final ErrorHandler validationErrorHandler = new ErrorHandler() { @@ -73,22 +110,22 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { @Override public void error(SAXParseException e) { - errors.add( e.getMessage()); + errors.add(e.getMessage()); } @Override public void fatalError(SAXParseException e) { - errors.add( e.getMessage()); + errors.add(e.getMessage()); } }; - public SLUBXmlFormatValidationPlugin () { + + public SLUBXmlFormatValidationPlugin() { dbf.setNamespaceAware(true); dbf.setValidating(false); dbf.setExpandEntityReferences(false); - } - /** init params to configure the plugin via xml forms + /** init params to configure the plugin via xml forms * @param initp parameter map */ public void initParams(Map<String, String> initp) { @@ -96,7 +133,8 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { initp.get("catalog").trim() }; validationCatalogResolver = new ValidationCatalogResolver(catalogs, errors); - schemaDir = initp.get("schemadir").trim(); + namespaceSchemaMapFile = initp.get("schemadir").trim(); + loadNamespaceSchemaMap(); } private static xmlInfoRecord getXMLinfo(Document doc) { @@ -133,7 +171,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { case schematron: case relaxng: { validationSchema.schemaLocalURL = - schemaDir + + namespaceSchemaMapFile + validationSchema.schemaURL.substring( validationSchema.schemaURL.lastIndexOf("/") ); @@ -217,7 +255,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { boolean isWellformedXml = false; dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + //dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); dbf.setValidating(false); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new File(filePath)); @@ -318,6 +356,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { */ public static void main(String[] args) { var plugin = new SLUBXmlFormatValidationPlugin(); + System.out.println("----------------------------------"); System.out.println("Agent: '" + plugin.getAgent() + "'"); System.out.println();