Skip to content
Snippets Groups Projects
Commit e376a81e authored by Andreas Romeyke's avatar Andreas Romeyke
Browse files

- fixed schema validation

-
parent d793370e
No related branches found
No related tags found
No related merge requests found
...@@ -31,8 +31,10 @@ import java.io.File; ...@@ -31,8 +31,10 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Optional;
/** /**
* SLUBXmlFormatValidationPlugin * SLUBXmlFormatValidationPlugin
...@@ -48,6 +50,19 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { ...@@ -48,6 +50,19 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
private final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); private final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance();
private final List<String> errors = new ArrayList<>(); private final List<String> errors = new ArrayList<>();
private final List<String> details = new ArrayList<>(); private final List<String> details = new ArrayList<>();
private static final HashSet<validationSchema> namespaceSchemaMap = new HashSet<>() {
{
add(new validationSchema("http://www.loc.gov/standards/alto/ns-v2#", ValidationSchemaType.schema, "http://www.loc.gov/standards/alto/alto-v2.0.xsd"));
add(new validationSchema("http://www.loc.gov/mods/v3", ValidationSchemaType.schema, "http://www.loc.gov/standards/mods/v3/mods-3-8.xsd"));
add(new validationSchema("http://www.lido-schema.org", ValidationSchemaType.schema, "http://www.lido-schema.org/schema/v1.1/lido-v1.1.xsd"));
add(new validationSchema( "http://slubarchiv.slub-dresden.de/rights1", ValidationSchemaType.schema, "https://slubarchiv.slub-dresden.de/fileadmin/groups/slubsite/slubarchiv/standards/rights/rights1.xsd"));
// put("http://www.opengis.net/citygml/profiles/base/1.0", "");
// put("http://www.opengis.net/kml/2.2", "");
// put("http://www.music-encoding.org/ns/mei", "");
// put("http://www.tei-c.org/ns/1.0", "");
}
};
private ValidationCatalogResolver validationCatalogResolver = null; private ValidationCatalogResolver validationCatalogResolver = null;
private final ErrorHandler validationErrorHandler = new ErrorHandler() { private final ErrorHandler validationErrorHandler = new ErrorHandler() {
@Override @Override
...@@ -109,6 +124,33 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { ...@@ -109,6 +124,33 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
+ "\n" + "\n"
); );
} }
private static Optional<validationSchema> assignSchema(Document doc) {
xmlInfoRecord info = getXMLinfo(doc);
Optional<validationSchema> optEle = Optional.empty();
if (null == info.nameSpaceUri) {
/* try if a DTD is assignable */
var type = assignDtdIfApplicable(doc);
if (type.equals(ValidationSchemaType.dtd)) {
System.out.println("found schema " + type);
var ele = new validationSchema(info.nameSpaceUri, type, info.systemID);
optEle = Optional.of(ele);
}
} else {
optEle = namespaceSchemaMap.stream()
.filter(
entry -> (entry.schemaType.equals(ValidationSchemaType.schema)) && (entry.nameSpace.equals(info.nameSpaceUri))
)
.findAny();
}
if ( optEle.isPresent() ) {
System.out.println("found namespace " + optEle.get().nameSpace );
System.out.println("found schematype " + optEle.get().schemaType );
System.out.println("found schemaURL " + optEle.get().schemaURL );
} else {
System.out.println("no element found");
}
return optEle;
}
@Override @Override
...@@ -118,6 +160,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { ...@@ -118,6 +160,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
wellformed = true; wellformed = true;
errors.clear(); errors.clear();
valid = validateAgainstSchema(filePath); valid = validateAgainstSchema(filePath);
System.out.println("ok no error ->" + valid );
} }
} catch (ParserConfigurationException e) { } catch (ParserConfigurationException e) {
reportError("ParserconfExc file=" + filePath + " Exc:" + e.getMessage()); reportError("ParserconfExc file=" + filePath + " Exc:" + e.getMessage());
...@@ -127,45 +170,83 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin { ...@@ -127,45 +170,83 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
reportError("not a XML file, " + e.getMessage(), filePath); reportError("not a XML file, " + e.getMessage(), filePath);
e.getStackTrace(); e.getStackTrace();
} }
// debug
System.out.println("errors:" );
System.out.println("----");
for (var line: errors) {
System.out.println("\t" + line);
}
System.out.println("----");
return valid; return valid;
} }
private static ValidationSchemaType assignDtdIfApplicable(Document doc) {
var info = getXMLinfo(doc);
if (null != info.systemID && info.systemID.endsWith(".dtd")) {
return ValidationSchemaType.dtd;
} else if (null != info.systemID ) {
}
return ValidationSchemaType.nothing;
}
private boolean checkIfWellformed(String filePath) throws ParserConfigurationException, IOException, SAXException { private boolean checkIfWellformed(String filePath) throws ParserConfigurationException, IOException, SAXException {
/* detect XML type via NS */ /* detect XML type via NS */
boolean isWellformedXml = false; boolean isWellformedXml = false;
dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
dbf.setValidating(false);
DocumentBuilder db = dbf.newDocumentBuilder(); DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new File(filePath)); Document doc = db.parse(new File(filePath));
xmlInfoRecord info = getXMLinfo(doc); xmlInfoRecord info = getXMLinfo(doc);
reportDetail("detect XML type via NS:" + info.nameSpaceUri); reportDetail("detect XML type via NS:" + info.nameSpaceUri);
//printXMLinfo(doc); /* TODO: align corresponding Schema based on systemID */
Optional<validationSchema> schema = assignSchema(doc);
if (schema.isEmpty()) {
reportError("there is no related schema found in *our* catalog of allowed XML types.", filePath);
} else {
reportDetail("assigned schema of type: " + schema.get().schemaType);
reportDetail("assigned schema url: " + schema.get().schemaURL);
if (schema.get().schemaType == ValidationSchemaType.dtd) {
assert(dbf.isValidating() == false);
dbf.setValidating(true); /* only used if DTD */
assert(dbf.isValidating() == true);
dbf.setFeature(XMLConstants.USE_CATALOG, true);
dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
System.out.println("-> dtd detected, use catalog");
} else if (!schema.get().schemaURL.isBlank()) {
System.out.println("-> set schema to " + schema.get().schemaURL);
dbf.setSchema(schema.get().schemaInst);
assert(dbf.getSchema() != null);
}
}
printXMLinfo(doc);
if (!info.xmlVersion.equals("1.0")) { if (!info.xmlVersion.equals("1.0")) {
reportError("not an expected XML 1.0 document, found " + info.xmlVersion, filePath); reportError("not an expected XML 1.0 document, found " + info.xmlVersion, filePath);
} else { } else {
isWellformedXml = true; isWellformedXml = true;
reportDetail("checked XML is wellformed"); reportDetail("checked XML is wellformed");
} }
return isWellformedXml; return isWellformedXml;
} }
private boolean validateAgainstSchema(String filePath) throws ParserConfigurationException, SAXException, IOException { private boolean validateAgainstSchema(String filePath) throws ParserConfigurationException, SAXException, IOException {
boolean isValidXml = false; boolean isValidXml = false;
dbf.setFeature(XMLConstants.USE_CATALOG, true); //dbf.setAttribute();
dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); dbf.setXIncludeAware(true);
dbf.setValidating(true); dbf.setNamespaceAware(true);
DocumentBuilder dbValidate = dbf.newDocumentBuilder(); DocumentBuilder dbValidate = dbf.newDocumentBuilder();
Document docValidate; Document docValidate;
dbValidate.setEntityResolver(validationCatalogResolver); dbValidate.setEntityResolver(validationCatalogResolver);
dbValidate.setErrorHandler(validationErrorHandler); dbValidate.setErrorHandler(validationErrorHandler);
reportDetail("align entitity resolver"); reportDetail("align entitity resolver");
docValidate = dbValidate.parse(new File(filePath)); docValidate = dbValidate.parse(new File(filePath));
if (dbValidate.isValidating()) {
docValidate.getXmlVersion(); docValidate.getXmlVersion();
reportDetail("reparse using own catalog"); reportDetail("reparse using own catalog");
if (errors.isEmpty()) { if (errors.isEmpty()) {
isValidXml = true; isValidXml = true;
} }
}
return isValidXml; return isValidXml;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment