Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
X
xml_plugin4rosetta
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Harbor Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Digital Preservation
xml_plugin4rosetta
Commits
e376a81e
Commit
e376a81e
authored
2 years ago
by
Andreas Romeyke
Browse files
Options
Downloads
Patches
Plain Diff
- fixed schema validation
-
parent
d793370e
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java
+91
-10
91 additions, 10 deletions
.../dps/repository/plugin/SLUBXmlFormatValidationPlugin.java
with
91 additions
and
10 deletions
java/org/slub/rosetta/dps/repository/plugin/SLUBXmlFormatValidationPlugin.java
+
91
−
10
View file @
e376a81e
...
@@ -31,8 +31,10 @@ import java.io.File;
...
@@ -31,8 +31,10 @@ import java.io.File;
import
java.io.IOException
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Collections
;
import
java.util.HashSet
;
import
java.util.List
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Map
;
import
java.util.Optional
;
/**
/**
* SLUBXmlFormatValidationPlugin
* SLUBXmlFormatValidationPlugin
...
@@ -48,6 +50,19 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
...
@@ -48,6 +50,19 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
private
final
DocumentBuilderFactory
dbf
=
DocumentBuilderFactory
.
newDefaultInstance
();
private
final
DocumentBuilderFactory
dbf
=
DocumentBuilderFactory
.
newDefaultInstance
();
private
final
List
<
String
>
errors
=
new
ArrayList
<>();
private
final
List
<
String
>
errors
=
new
ArrayList
<>();
private
final
List
<
String
>
details
=
new
ArrayList
<>();
private
final
List
<
String
>
details
=
new
ArrayList
<>();
private
static
final
HashSet
<
validationSchema
>
namespaceSchemaMap
=
new
HashSet
<>()
{
{
add
(
new
validationSchema
(
"http://www.loc.gov/standards/alto/ns-v2#"
,
ValidationSchemaType
.
schema
,
"http://www.loc.gov/standards/alto/alto-v2.0.xsd"
));
add
(
new
validationSchema
(
"http://www.loc.gov/mods/v3"
,
ValidationSchemaType
.
schema
,
"http://www.loc.gov/standards/mods/v3/mods-3-8.xsd"
));
add
(
new
validationSchema
(
"http://www.lido-schema.org"
,
ValidationSchemaType
.
schema
,
"http://www.lido-schema.org/schema/v1.1/lido-v1.1.xsd"
));
add
(
new
validationSchema
(
"http://slubarchiv.slub-dresden.de/rights1"
,
ValidationSchemaType
.
schema
,
"https://slubarchiv.slub-dresden.de/fileadmin/groups/slubsite/slubarchiv/standards/rights/rights1.xsd"
));
// put("http://www.opengis.net/citygml/profiles/base/1.0", "");
// put("http://www.opengis.net/kml/2.2", "");
// put("http://www.music-encoding.org/ns/mei", "");
// put("http://www.tei-c.org/ns/1.0", "");
}
};
private
ValidationCatalogResolver
validationCatalogResolver
=
null
;
private
ValidationCatalogResolver
validationCatalogResolver
=
null
;
private
final
ErrorHandler
validationErrorHandler
=
new
ErrorHandler
()
{
private
final
ErrorHandler
validationErrorHandler
=
new
ErrorHandler
()
{
@Override
@Override
...
@@ -109,6 +124,33 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
...
@@ -109,6 +124,33 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
+
"\n"
+
"\n"
);
);
}
}
private
static
Optional
<
validationSchema
>
assignSchema
(
Document
doc
)
{
xmlInfoRecord
info
=
getXMLinfo
(
doc
);
Optional
<
validationSchema
>
optEle
=
Optional
.
empty
();
if
(
null
==
info
.
nameSpaceUri
)
{
/* try if a DTD is assignable */
var
type
=
assignDtdIfApplicable
(
doc
);
if
(
type
.
equals
(
ValidationSchemaType
.
dtd
))
{
System
.
out
.
println
(
"found schema "
+
type
);
var
ele
=
new
validationSchema
(
info
.
nameSpaceUri
,
type
,
info
.
systemID
);
optEle
=
Optional
.
of
(
ele
);
}
}
else
{
optEle
=
namespaceSchemaMap
.
stream
()
.
filter
(
entry
->
(
entry
.
schemaType
.
equals
(
ValidationSchemaType
.
schema
))
&&
(
entry
.
nameSpace
.
equals
(
info
.
nameSpaceUri
))
)
.
findAny
();
}
if
(
optEle
.
isPresent
()
)
{
System
.
out
.
println
(
"found namespace "
+
optEle
.
get
().
nameSpace
);
System
.
out
.
println
(
"found schematype "
+
optEle
.
get
().
schemaType
);
System
.
out
.
println
(
"found schemaURL "
+
optEle
.
get
().
schemaURL
);
}
else
{
System
.
out
.
println
(
"no element found"
);
}
return
optEle
;
}
@Override
@Override
...
@@ -118,6 +160,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
...
@@ -118,6 +160,7 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
wellformed
=
true
;
wellformed
=
true
;
errors
.
clear
();
errors
.
clear
();
valid
=
validateAgainstSchema
(
filePath
);
valid
=
validateAgainstSchema
(
filePath
);
System
.
out
.
println
(
"ok no error ->"
+
valid
);
}
}
}
catch
(
ParserConfigurationException
e
)
{
}
catch
(
ParserConfigurationException
e
)
{
reportError
(
"ParserconfExc file="
+
filePath
+
" Exc:"
+
e
.
getMessage
());
reportError
(
"ParserconfExc file="
+
filePath
+
" Exc:"
+
e
.
getMessage
());
...
@@ -127,45 +170,83 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
...
@@ -127,45 +170,83 @@ public class SLUBXmlFormatValidationPlugin implements FormatValidationPlugin {
reportError
(
"not a XML file, "
+
e
.
getMessage
(),
filePath
);
reportError
(
"not a XML file, "
+
e
.
getMessage
(),
filePath
);
e
.
getStackTrace
();
e
.
getStackTrace
();
}
}
// debug
System
.
out
.
println
(
"errors:"
);
System
.
out
.
println
(
"----"
);
for
(
var
line:
errors
)
{
System
.
out
.
println
(
"\t"
+
line
);
}
System
.
out
.
println
(
"----"
);
return
valid
;
return
valid
;
}
}
private
static
ValidationSchemaType
assignDtdIfApplicable
(
Document
doc
)
{
var
info
=
getXMLinfo
(
doc
);
if
(
null
!=
info
.
systemID
&&
info
.
systemID
.
endsWith
(
".dtd"
))
{
return
ValidationSchemaType
.
dtd
;
}
else
if
(
null
!=
info
.
systemID
)
{
}
return
ValidationSchemaType
.
nothing
;
}
private
boolean
checkIfWellformed
(
String
filePath
)
throws
ParserConfigurationException
,
IOException
,
SAXException
{
private
boolean
checkIfWellformed
(
String
filePath
)
throws
ParserConfigurationException
,
IOException
,
SAXException
{
/* detect XML type via NS */
/* detect XML type via NS */
boolean
isWellformedXml
=
false
;
boolean
isWellformedXml
=
false
;
dbf
.
setAttribute
(
"http://apache.org/xml/features/nonvalidating/load-external-dtd"
,
false
);
dbf
.
setAttribute
(
"http://apache.org/xml/features/nonvalidating/load-external-dtd"
,
false
);
dbf
.
setValidating
(
false
);
DocumentBuilder
db
=
dbf
.
newDocumentBuilder
();
DocumentBuilder
db
=
dbf
.
newDocumentBuilder
();
Document
doc
=
db
.
parse
(
new
File
(
filePath
));
Document
doc
=
db
.
parse
(
new
File
(
filePath
));
xmlInfoRecord
info
=
getXMLinfo
(
doc
);
xmlInfoRecord
info
=
getXMLinfo
(
doc
);
reportDetail
(
"detect XML type via NS:"
+
info
.
nameSpaceUri
);
reportDetail
(
"detect XML type via NS:"
+
info
.
nameSpaceUri
);
//printXMLinfo(doc);
/* TODO: align corresponding Schema based on systemID */
Optional
<
validationSchema
>
schema
=
assignSchema
(
doc
);
if
(
schema
.
isEmpty
())
{
reportError
(
"there is no related schema found in *our* catalog of allowed XML types."
,
filePath
);
}
else
{
reportDetail
(
"assigned schema of type: "
+
schema
.
get
().
schemaType
);
reportDetail
(
"assigned schema url: "
+
schema
.
get
().
schemaURL
);
if
(
schema
.
get
().
schemaType
==
ValidationSchemaType
.
dtd
)
{
assert
(
dbf
.
isValidating
()
==
false
);
dbf
.
setValidating
(
true
);
/* only used if DTD */
assert
(
dbf
.
isValidating
()
==
true
);
dbf
.
setFeature
(
XMLConstants
.
USE_CATALOG
,
true
);
dbf
.
setAttribute
(
"http://apache.org/xml/features/nonvalidating/load-external-dtd"
,
false
);
System
.
out
.
println
(
"-> dtd detected, use catalog"
);
}
else
if
(!
schema
.
get
().
schemaURL
.
isBlank
())
{
System
.
out
.
println
(
"-> set schema to "
+
schema
.
get
().
schemaURL
);
dbf
.
setSchema
(
schema
.
get
().
schemaInst
);
assert
(
dbf
.
getSchema
()
!=
null
);
}
}
printXMLinfo
(
doc
);
if
(!
info
.
xmlVersion
.
equals
(
"1.0"
))
{
if
(!
info
.
xmlVersion
.
equals
(
"1.0"
))
{
reportError
(
"not an expected XML 1.0 document, found "
+
info
.
xmlVersion
,
filePath
);
reportError
(
"not an expected XML 1.0 document, found "
+
info
.
xmlVersion
,
filePath
);
}
else
{
}
else
{
isWellformedXml
=
true
;
isWellformedXml
=
true
;
reportDetail
(
"checked XML is wellformed"
);
reportDetail
(
"checked XML is wellformed"
);
}
}
return
isWellformedXml
;
return
isWellformedXml
;
}
}
private
boolean
validateAgainstSchema
(
String
filePath
)
throws
ParserConfigurationException
,
SAXException
,
IOException
{
private
boolean
validateAgainstSchema
(
String
filePath
)
throws
ParserConfigurationException
,
SAXException
,
IOException
{
boolean
isValidXml
=
false
;
boolean
isValidXml
=
false
;
dbf
.
set
Feature
(
XMLConstants
.
USE_CATALOG
,
true
);
//
dbf.set
Attribute(
);
dbf
.
set
Attribute
(
"http://apache.org/xml/features/nonvalidating/load-external-dtd"
,
fals
e
);
dbf
.
set
XIncludeAware
(
tru
e
);
dbf
.
set
Validating
(
true
);
dbf
.
set
NamespaceAware
(
true
);
DocumentBuilder
dbValidate
=
dbf
.
newDocumentBuilder
();
DocumentBuilder
dbValidate
=
dbf
.
newDocumentBuilder
();
Document
docValidate
;
Document
docValidate
;
dbValidate
.
setEntityResolver
(
validationCatalogResolver
);
dbValidate
.
setEntityResolver
(
validationCatalogResolver
);
dbValidate
.
setErrorHandler
(
validationErrorHandler
);
dbValidate
.
setErrorHandler
(
validationErrorHandler
);
reportDetail
(
"align entitity resolver"
);
reportDetail
(
"align entitity resolver"
);
docValidate
=
dbValidate
.
parse
(
new
File
(
filePath
));
docValidate
=
dbValidate
.
parse
(
new
File
(
filePath
));
if
(
dbValidate
.
isValidating
())
{
docValidate
.
getXmlVersion
();
docValidate
.
getXmlVersion
();
reportDetail
(
"reparse using own catalog"
);
reportDetail
(
"reparse using own catalog"
);
if
(
errors
.
isEmpty
())
{
if
(
errors
.
isEmpty
())
{
isValidXml
=
true
;
isValidXml
=
true
;
}
}
}
return
isValidXml
;
return
isValidXml
;
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment