Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<assert test="mets:mets">CSIP1000;ERROR;The mets root element is mandatory.</assert>
</rule>
<rule context="/mets:mets">
<assert test="@OBJID != ''">CSIP1;ERROR;The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder.</assert>
<assert test="@OBJID_test">CSIP1;ERROR;The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder.</assert>
<assert test="(@TYPE_vocabulary_test) and (@TYPE != 'Other' or (@TYPE = 'Other' and @csip:OTHERTYPE != ''))">CSIP2;ERROR;The mets/@TYPE attibute MUST be used to declare the category of the content held in the package, e.g. book, journal, stereograph, video, etc.. Legal values are defined in a fixed vocabulary.</assert>
<assert test="(@TYPE = 'Other' and @csip:OTHERTYPE) or @TYPE != 'Other'">CSIP3;WARN;When the `mets/@TYPE` attribute has the value "Other" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "Other" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute.</assert>
<assert test="(@csip:CONTENTINFORMATIONTYPE_vocabulary_test) and (@csip:CONTENTINFORMATIONTYPE != 'OTHER' or (@csip:CONTENTINFORMATIONTYPE = 'OTHER' and @csip:OTHERCONTENTINFORMATIONTYPE != ''))">CSIP4;ERROR;Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents.</assert>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<assert test="mets:mets">CSIP1000;ERROR;The mets root element is mandatory.</assert>
</rule>
<rule context="/mets:mets">
<assert test="@OBJID != ''">CSIP1;ERROR;The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder.</assert>
<assert test="@OBJID_test">CSIP1;ERROR;The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder.</assert>
<assert test="(@TYPE_vocabulary_test) and (@TYPE != 'Other' or (@TYPE = 'Other' and @csip:OTHERTYPE != ''))">CSIP2;ERROR;The `mets/@TYPE` attribute MUST be used to declare the category of the content held in the package, e.g. "Datasets", "Websites", "Mixes" , "Other", etc.. Legal values are defined in a fixed vocabulary. When the content category used falls outside of the defined vocabulary the `mets/@TYPE` value must be set to "Other" and the specific value declared in `mets/@csip:OTHERTYPE`. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced.</assert>
<assert test="(@TYPE = 'Other' and @csip:OTHERTYPE) or @TYPE != 'Other'">CSIP3;WARN;When the `mets/@TYPE` attribute has the value "Other" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "Other" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute.</assert>
<assert test="(@csip:CONTENTINFORMATIONTYPE_vocabulary_test) and (@csip:CONTENTINFORMATIONTYPE != 'OTHER' or (@csip:CONTENTINFORMATIONTYPE = 'OTHER' and @csip:OTHERCONTENTINFORMATIONTYPE != ''))">CSIP4;ERROR;Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents.</assert>
Expand Down
16 changes: 11 additions & 5 deletions eark_validator/ipxml/schematron.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ class SchematronTests():

tests = {}

def __init__(self):
def __init__(self, to_validate: Optional[Path]):
for attribute, vocabulary_uri in self.__vocabulary_definitions.items():
self.tests[attribute + '_vocabulary_test'] = self.__create_vocabulary_test(attribute, vocabulary_uri)
self.tests[attribute + '_vocabulary_test'] = self._create_vocabulary_test(attribute, vocabulary_uri)

def __create_vocabulary_test(self, attribute: str, vocabulary_uri: str) -> str:
self.tests['@OBJID_test'] = self._create_OBJID_test(to_validate)

def _create_vocabulary_test(self, attribute: str, vocabulary_uri: str) -> str:
vocabulary_tests = []
for line_bytes in urlopen(vocabulary_uri):
line = line_bytes.decode('utf-8')
Expand All @@ -70,17 +72,21 @@ def __create_vocabulary_test(self, attribute: str, vocabulary_uri: str) -> str:

return ' or '.join(vocabulary_tests)

schematron_tests = SchematronTests()
def _create_OBJID_test(self, to_validate: Optional[Path]):
if to_validate:
return f"(@OBJID = '{to_validate.stem}')"
return "(@OBJID != '')"

class SchematronRuleset():
"""Encapsulates a set of Schematron rules loaded from a file."""
def __init__(self, sch_path: str=None):
def __init__(self, sch_path: str=None, to_validate: Path=None):
if not os.path.exists(sch_path):
raise FileNotFoundError(NO_PATH.format(sch_path))
if not os.path.isfile(sch_path):
raise ValueError(NOT_FILE.format(sch_path))
self._path = sch_path

schematron_tests = SchematronTests(to_validate)
try:
with open(sch_path) as schematron_file:
schematron_data = schematron_file.read()
Expand Down
4 changes: 2 additions & 2 deletions eark_validator/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ def validate(cls, version: SpecificationVersion, to_validate: Path) -> Validatio
'metadata': metadata
})

csip_profile = SC.ValidationProfile(SpecificationType.CSIP, version)
csip_profile = SC.ValidationProfile(SpecificationType.CSIP, version, to_validate)
csip_profile.validate(to_validate.joinpath(METS))
results = csip_profile.get_all_results()

package: InformationPackage = InformationPackages.from_path(to_validate)
if package.details.oaispackagetype in ['SIP', 'DIP']:
profile = SC.ValidationProfile(SpecificationType.from_string(package.details.oaispackagetype), version)
profile = SC.ValidationProfile(SpecificationType.from_string(package.details.oaispackagetype), version, to_validate)
profile.validate(to_validate.joinpath(METS))
results.extend(profile.get_all_results())

Expand Down
5 changes: 3 additions & 2 deletions eark_validator/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@
from eark_validator.specifications.specification import EarkSpecification, Specification, SpecificationType, SpecificationVersion
from eark_validator.const import NO_PATH, NOT_FILE
from eark_validator.model import Severity
from pathlib import Path

class ValidationProfile():
""" A complete set of Schematron rule sets that comprise a complete validation profile."""
def __init__(self, type: SpecificationType, version: SpecificationVersion):
def __init__(self, type: SpecificationType, version: SpecificationVersion, to_validate: Path = None):
specification: Specification = EarkSpecification(type, version).specification

self._rulesets: Dict[str, SchematronRuleset] = {}
Expand All @@ -46,7 +47,7 @@ def __init__(self, type: SpecificationType, version: SpecificationVersion):
self.results: Dict[str, List[Result]] = {}
self.messages: List[str] = []
for section in specification.sections:
self.rulesets[section] = SchematronRuleset(get_schematron_path(version, specification.id, section))
self.rulesets[section] = SchematronRuleset(get_schematron_path(version, specification.id, section), to_validate)

@property
def specification(self) -> Specification:
Expand Down