diff --git a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java index 8c607091e466b46f5283b966f51010969d60ab33..c07f0286227d6a764fda8d573a1c0cc7dd8da1a6 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java @@ -35,11 +35,11 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.stream.Stream; /** * SLUBXmlFormatValidationPlugin @@ -58,7 +58,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private final List<String> details = new ArrayList<>(); public static final HashSet<ValidationSchema> namespaceSchemaMap = new HashSet<>(); - private void loadNamespaceSchemaMap() { + public void loadNamespaceSchemaMap() { DocumentBuilder db = null; try { db = dbf.newDocumentBuilder(); @@ -69,9 +69,8 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { Node n = nodes.item(i); NamedNodeMap attributes = n.getAttributes(); List<String> attr_strings = List.of(new String[]{"schematype", "namespace", "schemaurl"}); - Stream<String> attr_stream = attr_strings.stream().map(s -> attributes.getNamedItem(s).getTextContent()); - boolean allValid = attr_stream.allMatch(SLUBXmlFormatValidationPlugin::checkAttributesOfNamespaceSchemaMapFile); - List<String> attr_list = attr_strings.stream().toList(); + List<String> attr_list = attr_strings.stream().map(s -> attributes.getNamedItem(s).getTextContent()).toList(); + boolean allValid = attr_list.stream().allMatch(SLUBXmlFormatValidationPlugin::checkAttributesOfNamespaceSchemaMapFile); if (allValid) { assert (attr_strings.get(0).equals("schematype")); var attr_type = attr_list.get(0); @@ -129,11 +128,12 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { * @param initp parameter map */ public void initParams(Map<String, String> initp) { + System.out.println( initp); String[] catalogs = new String[] { initp.get("catalog").trim() }; validationCatalogResolver = new ValidationCatalogResolver(catalogs, errors); - namespaceSchemaMapFile = initp.get("schemadir").trim(); + namespaceSchemaMapFile = initp.get("schemacatalog").trim(); loadNamespaceSchemaMap(); } @@ -165,23 +165,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { + "\n" ); } - private ValidationSchema localizeSchemaUrl(ValidationSchema validationSchema) { - switch (validationSchema.schemaType) { - case schema: - case schematron: - case relaxng: { - validationSchema.schemaLocalURL = - namespaceSchemaMapFile + - validationSchema.schemaURL.substring( - validationSchema.schemaURL.lastIndexOf("/") - ); - } - case dtd: - case nothing: - break; - } - return validationSchema; - } + private Optional<ValidationSchema> assignSchema(Document doc) { xmlInfoRecord info = getXMLinfo(doc); Optional<ValidationSchema> optEle = Optional.empty(); @@ -194,18 +178,25 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { optEle = Optional.of(ele); } } else { + System.out.println("check info.nameSpaceUri=" + info.nameSpaceUri); + var it = namespaceSchemaMap.iterator(); + System.out.println("map size=" + namespaceSchemaMap.size()); + while (it.hasNext()){ + ValidationSchema v = it.next(); + System.out.println(v.schemaURL); + System.out.println(v.nameSpace); + System.out.println(v.schemaType); + } optEle = namespaceSchemaMap.stream() .filter( entry -> (entry.schemaType.equals(ValidationSchemaType.schema)) && (entry.nameSpace.equals(info.nameSpaceUri)) ) - .map(this::localizeSchemaUrl) .findAny(); } if ( optEle.isPresent() ) { System.out.println("found namespace " + optEle.get().nameSpace ); System.out.println("found schematype " + optEle.get().schemaType ); System.out.println("found schemaURL " + optEle.get().schemaURL ); - System.out.println("found localized schemaURL " + optEle.get().schemaLocalURL ); } else { System.out.println("no element found"); } @@ -216,11 +207,31 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { @Override public boolean validateFormat(String filePath) { try { - if (checkIfWellformed(filePath)) { + var doc = getDocument(filePath); + if (checkIfWellformed(doc)) { wellformed = true; errors.clear(); - valid = validateAgainstSchema(filePath); - System.out.println("ok no error ->" + valid ); + Optional<ValidationSchema> schema = assignSchema(doc); + if (schema.isEmpty()) { + reportError("there is no related schema found in *our* catalog of allowed XML types.", filePath); + valid = false; + } else { + reportDetail("assigned schema of type: " + schema.get().schemaType); + reportDetail("assigned schema url: " + schema.get().schemaURL); + if (schema.get().schemaType == ValidationSchemaType.dtd) { + assert(!dbf.isValidating()); + dbf.setValidating(true); /* only used if DTD */ + assert(dbf.isValidating()); + dbf.setFeature(XMLConstants.USE_CATALOG, true); + dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + System.out.println("-> dtd detected, use catalog"); + } else if (!schema.get().schemaURL.isBlank()) { + System.out.println("-> set schema to " + schema.get().schemaURL); + dbf.setSchema(schema.get().schemaInst); + assert(dbf.getSchema() != null); + } + valid = validateAgainstSchema(filePath); + } } } catch (ParserConfigurationException e) { reportError("ParserconfExc file=" + filePath + " Exc:" + e.getMessage()); @@ -250,41 +261,16 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { } - private boolean checkIfWellformed(String filePath) throws ParserConfigurationException, IOException, SAXException { + private boolean checkIfWellformed(Document doc) throws ParserConfigurationException, IOException, SAXException { /* detect XML type via NS */ boolean isWellformedXml = false; - dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); - dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - //dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); - dbf.setValidating(false); - DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(new File(filePath)); xmlInfoRecord info = getXMLinfo(doc); reportDetail("detect XML type via NS:" + info.nameSpaceUri); /* TODO: align corresponding Schema based on systemID */ - Optional<ValidationSchema> schema = assignSchema(doc); - if (schema.isEmpty()) { - reportError("there is no related schema found in *our* catalog of allowed XML types.", filePath); - } else { - reportDetail("assigned schema of type: " + schema.get().schemaType); - reportDetail("assigned schema url: " + schema.get().schemaURL); - if (schema.get().schemaType == ValidationSchemaType.dtd) { - assert(!dbf.isValidating()); - dbf.setValidating(true); /* only used if DTD */ - assert(dbf.isValidating()); - dbf.setFeature(XMLConstants.USE_CATALOG, true); - dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - System.out.println("-> dtd detected, use catalog"); - } else if (!schema.get().schemaURL.isBlank()) { - System.out.println("-> set schema to " + schema.get().schemaURL); - dbf.setSchema(schema.get().schemaInst); - assert(dbf.getSchema() != null); - } - } printXMLinfo(doc); if (!info.xmlVersion.equals("1.0")) { - reportError("not an expected XML 1.0 document, found " + info.xmlVersion, filePath); + reportError("not an expected XML 1.0 document, found " + info.xmlVersion); } else { isWellformedXml = true; reportDetail("checked XML is wellformed"); @@ -293,6 +279,16 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { return isWellformedXml; } + private Document getDocument(String filePath) throws ParserConfigurationException, SAXException, IOException { + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + //dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + dbf.setValidating(false); + DocumentBuilder db = dbf.newDocumentBuilder(); + Document doc = db.parse(new File(filePath)); + return doc; + } + private boolean validateAgainstSchema(String filePath) throws ParserConfigurationException, SAXException, IOException { boolean isValidXml = false; //dbf.setAttribute(); @@ -314,7 +310,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { private void reportError(String msg) { errors.add(msg); -// System.out.println(msg); + System.out.println("ERROR: " + msg); // TODO: log.warn(msg); } private void reportError(String msg, String filepath) { @@ -356,7 +352,10 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { */ public static void main(String[] args) { var plugin = new SLUBXmlFormatValidationPlugin(); - + Map<String, String> initp = new HashMap<>(); + initp.put("catalog", "/etc/xml/catalog"); + initp.put("schemacatalog", "example_catalog/schema_catalog.xml"); + plugin.initParams(initp); System.out.println("----------------------------------"); System.out.println("Agent: '" + plugin.getAgent() + "'"); System.out.println(); diff --git a/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java b/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java index 9455d2af9e734e16c8e241d27f5847ca3e27b65f..99c5707d713e52a172dae197f4d3fd58b1c86cd5 100644 --- a/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java +++ b/java/org/slub/rosetta/dps/repository/plugin/TestSLUBXmlFormatValidationPlugin.java @@ -25,25 +25,34 @@ public class TestSLUBXmlFormatValidationPlugin { private static SLUBXmlFormatValidationPlugin mock; private Path[] testPaths; - private Map<String, String> initp = new HashMap<>(); + private final Map<String, String> initp = new HashMap<>(); @Before public void setUp() { Stream<Path> findFiles; try { - findFiles = Files.find(Path.of("resources/test/"), 5, ((path, basicFileAttributes) -> Files.isRegularFile(path))); + findFiles = Files.find(Path.of("resources/test/JATS"), 5, ((path, basicFileAttributes) -> Files.isRegularFile(path))); testPaths = findFiles.toArray(Path[]::new); } catch (IOException e) { // do nothing, because nothing found } mock = new SLUBXmlFormatValidationPlugin(); initp.put("catalog", "/etc/xml/catalog"); + initp.put("schemacatalog", "example_catalog/schema_catalog.xml"); mock.initParams(initp); } + + @org.junit.Test + public void loadNamespaceSchemaMap() { + initp.put("schemacatalog", "example_catalog/schema_catalog.xml"); + mock.initParams(initp); + mock.loadNamespaceSchemaMap(); + } + @org.junit.Test public void validateFormat() { for (Path path : testPaths) { - if (path.toString().contains("JATS")) { + if (!path.toString().contains("JATS")) { System.out.println("file " + path + " ignored"); continue; } @@ -57,4 +66,5 @@ public class TestSLUBXmlFormatValidationPlugin { } } + }