Skip to content

Commit 7ce5d38

Browse files
authored
Merge pull request #196 from IIIF/v4
Adding early version of unfinished schema for v4
2 parents 11ef58d + 50df8ff commit 7ce5d38

84 files changed

Lines changed: 3617 additions & 23 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
name: Check JSON Schema $ref integrity
2+
3+
on:
4+
pull_request:
5+
types: [opened, synchronize, reopened]
6+
# Optional: only run when schemas or the script change
7+
paths:
8+
- "schema/**"
9+
- ".github/workflows/check-jsonschema.yml"
10+
11+
permissions:
12+
contents: read
13+
14+
jobs:
15+
check-refs:
16+
runs-on: ubuntu-latest
17+
18+
steps:
19+
- name: Checkout
20+
uses: actions/checkout@v5
21+
22+
- name: Set up Python
23+
uses: actions/setup-python@v6
24+
with:
25+
python-version: 3.12
26+
27+
- name: Set up uv
28+
uses: astral-sh/setup-uv@v6
29+
with:
30+
enable-cache: true
31+
32+
- name: Sync dependencies
33+
run: uv sync --all-extras --dev
34+
35+
- name: Run $ref checker
36+
run: |
37+
uv run python -m presentation_validator.v4.check_refs

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- uses: actions/checkout@v5
1919

2020
- name: Set up Python
21-
uses: actions/setup-python@v5
21+
uses: actions/setup-python@v6
2222
with:
2323
python-version: ${{ matrix.python-version }}
2424

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,21 @@ This should start up a local server, running at <localhost:8080>. To test it, tr
8686
}
8787
```
8888
You may also use `--hostname` to specify a hostname or IP address to which to bind and `--port` for a port to which to bind.
89+
90+
91+
## Github action
92+
93+
It is also possible to run the validator against JSON documents which are in a Github repository using a github action. An example is below:
94+
95+
```
96+
- name: Run IIIF validator
97+
uses: IIIF/presentation-validator@main
98+
with:
99+
directory: path/to/json
100+
version: 3
101+
extension: .json
102+
```
103+
104+
* `directory` is required. The validator will start at this directory and then go through any sub-directories looking for json files to validate.
105+
* `version` is optional. If supplied any json files will be validated against this version. If its not supplied the validator will look inside the json to see which version is specified in the @context
106+
* `extension` is optional and defaults to .json. - name: Run IIIF validator

fixtures/4/bad/dup_id.json

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"@context": "http://iiif.io/api/presentation/4/context.json",
3+
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas.json",
4+
"type": "Manifest",
5+
"label": {
6+
"en": [
7+
"Canvas and first annotation page have same id"
8+
]
9+
},
10+
"items": [
11+
{
12+
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/canvas/p1",
13+
"type": "Canvas",
14+
"height": 1800,
15+
"width": 1200,
16+
"items": [
17+
{
18+
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/canvas/p1",
19+
"type": "AnnotationPage",
20+
"items": [
21+
{
22+
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/annotation/p0001-image",
23+
"type": "Annotation",
24+
"motivation": [ "painting" ],
25+
"body": {
26+
"id": "http://iiif.io/api/presentation/2.1/example/fixtures/resources/page1-full.png",
27+
"type": "Image",
28+
"format": "image/png",
29+
"height": 1800,
30+
"width": 1200
31+
},
32+
"target": {
33+
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/canvas/p1",
34+
"type": "Canvas"
35+
}
36+
}
37+
]
38+
}
39+
]
40+
}
41+
]
42+
}

fixtures/4/ok/02_timeline.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"@context": "http://iiif.io/api/presentation/4/context.json",
3+
"id": "https://iiif.io/api/presentation/4.0/example/02_timeline.json",
4+
"type": "Manifest",
5+
"label": {
6+
"en": [
7+
"Simplest Audio Example (IIIF Presentation v4)"
8+
]
9+
},
10+
"items": [
11+
{
12+
"id": "https://iiif.io/api/presentation/4.0/example/02",
13+
"type": "Timeline",
14+
"duration": 1985.024,
15+
"items": [
16+
{
17+
"id": "https://iiif.io/api/presentation/4.0/example/02/page",
18+
"type": "AnnotationPage",
19+
"items": [
20+
{
21+
"id": "https://iiif.io/api/presentation/4.0/example/02/page/anno",
22+
"type": "Annotation",
23+
"motivation": ["painting"],
24+
"body": {
25+
"id": "https://fixtures.iiif.io/audio/indiana/mahler-symphony-3/CD1/medium/128Kbps.mp4",
26+
"type": "Audio",
27+
"format": "audio/mp4",
28+
"duration": 1985.024
29+
},
30+
"target": {
31+
"id": "https://iiif.io/api/presentation/4.0/example/02",
32+
"type": "Timeline"
33+
}
34+
}
35+
]
36+
}
37+
]
38+
}
39+
]
40+
}

presentation_validator/v3/schemavalidator.py

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,39 @@ def printPath(pathObj, fields):
2424
path += '/[{}]'.format(fields)
2525
return path
2626

27+
def create_snippet(data):
28+
# Take possibly a large JSON document and only show the fields at the current level
29+
for key in data:
30+
if isinstance(data[key], list):
31+
data[key] = '[ ... ]'
32+
elif isinstance(data[key], dict):
33+
data[key] = '{ ... }'
34+
35+
return data
36+
37+
def convertValidationError(err, errorCount, total):
38+
detail = ''
39+
if 'title' in err.schema:
40+
detail = err.schema['title']
41+
description = ''
42+
if 'description' in err.schema:
43+
detail += ' ' + err.schema['description']
44+
context = err.instance
45+
if isinstance(context, dict):
46+
for key in context:
47+
if isinstance(context[key], list):
48+
context[key] = '[ ... ]'
49+
elif isinstance(context[key], dict):
50+
context[key] = '{ ... }'
51+
52+
return ErrorDetail(
53+
f"Error {errorCount} of {total}.\n Message: {err.message}",
54+
detail,
55+
description,
56+
printPath(err.path, err.message),
57+
context,
58+
err)
59+
2760
def validate(data, version, url):
2861
if version == IIIFVersion.V3_0:
2962
with open(f'{SCHEMA_DIR}/iiif_3_0.json') as json_file:
@@ -90,31 +123,12 @@ def validate(data, version, url):
90123
if errorPath not in seen_titles:
91124
errors.append(errorDup)
92125
seen_titles.add(errorPath)
126+
93127
errorCount = 1
94128
# Now create some useful messsages to pass on
95129
for err in errors:
96-
detail = ''
97-
if 'title' in err.schema:
98-
detail = err.schema['title']
99-
description = ''
100-
if 'description' in err.schema:
101-
detail += ' ' + err.schema['description']
102-
context = err.instance
103-
if isinstance(context, dict):
104-
for key in context:
105-
if isinstance(context[key], list):
106-
context[key] = '[ ... ]'
107-
elif isinstance(context[key], dict):
108-
context[key] = '{ ... }'
109-
110-
result.errorList.append(ErrorDetail(
111-
'Error {} of {}.\n Message: {}'.format(errorCount, len(errors), err.message),
112-
detail,
113-
description,
114-
printPath(err.path, err.message),
115-
context,
116-
err))
117-
#print (json.dumps(err.instance, indent=4))
130+
result.errorList.append(convertValidationError(err, errorCount, len(errors)))
131+
118132
errorCount += 1
119133

120134
# Return:
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from pathlib import Path
5+
from typing import Any, Dict, Iterable, List, Tuple
6+
from urllib.parse import urljoin, urlparse
7+
from pathlib import PurePosixPath
8+
9+
from referencing import Registry, Resource
10+
from referencing.exceptions import Unresolvable
11+
12+
def iter_refs(node: Any, path: str = "#") -> Iterable[Tuple[str, str]]:
13+
"""
14+
Yield ($ref_value, json_pointer_path_in_schema) for every $ref in a schema tree.
15+
"""
16+
#print (f"Travesring {node}")
17+
if isinstance(node, dict):
18+
if "$ref" in node and isinstance(node["$ref"], str):
19+
yield node["$ref"], path + "/$ref"
20+
for k, v in node.items():
21+
yield from iter_refs(v, f"{path}/{k}")
22+
elif isinstance(node, list):
23+
for i, v in enumerate(node):
24+
yield from iter_refs(v, f"{path}/{i}")
25+
26+
27+
def build_registry_from_dir(schema_dir: str | Path) -> Tuple[Registry, Dict[str, Dict[str, Any]]]:
28+
"""
29+
Load all *.json schemas under schema_dir into a referencing.Registry.
30+
31+
Each resource is keyed by:
32+
- its $id, if present, else
33+
- a file:// URI for its absolute path.
34+
"""
35+
schema_dir = Path(schema_dir)
36+
registry = Registry()
37+
by_uri: Dict[str, Dict[str, Any]] = {}
38+
39+
found=False
40+
for path in schema_dir.rglob("*.json"):
41+
found = True # Found at least one JSON file
42+
with path.open("r", encoding="utf-8") as f:
43+
schema = json.load(f)
44+
45+
print (f"Loading {path.name}")
46+
uri = schema.get("$id")
47+
if not uri:
48+
uri = path.resolve().as_uri() # file:///.../schema.json
49+
50+
resource = Resource.from_contents(schema)
51+
registry = registry.with_resource(uri, resource)
52+
if uri in by_uri:
53+
raise Exception(f"Duplicate schema ID {uri} found in {path.name}")
54+
55+
by_uri[uri] = schema
56+
57+
if not found:
58+
raise FileNotFoundError(f"No JSON files found in {schema_dir}")
59+
60+
return registry, by_uri
61+
62+
63+
def find_missing_refs_in_dir(schema_dir: str | Path) -> List[Dict[str, str]]:
64+
"""
65+
Returns a list of unresolved $refs across all schemas in schema_dir.
66+
"""
67+
registry, schemas = build_registry_from_dir(schema_dir)
68+
missing: List[Dict[str, str]] = []
69+
for base_uri, schema in schemas.items():
70+
resolver = registry.resolver(base_uri=base_uri)
71+
72+
for ref, where in iter_refs(schema):
73+
# Make relative refs absolute against the schema's base URI
74+
target = urljoin(base_uri, ref)
75+
76+
try:
77+
resolver.lookup(target)
78+
except Unresolvable:
79+
missing.append(
80+
{
81+
"schema": base_uri,
82+
"ref": ref,
83+
"where": where,
84+
"resolved_target": target,
85+
}
86+
)
87+
88+
return missing
89+
90+
91+
if __name__ == "__main__":
92+
problems = find_missing_refs_in_dir(Path(__file__).resolve().parent.parent.parent / "schema" / "v4")
93+
if problems:
94+
print("\nMissing/unresolvable $refs:")
95+
for p in problems:
96+
print(f"- In {p['schema'].split('/')[-1]}:\n at {p['where']}: {p['ref']} (→ {p['resolved_target']})\n")
97+
raise SystemExit(2)
98+
else:
99+
print("All $refs resolved.")
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import sys
2+
import json
3+
from presentation_validator.model import ErrorDetail
4+
from presentation_validator.v3.schemavalidator import create_snippet
5+
6+
ignore = ["target", "lookAt", "range","structures","first","last","start","source"]
7+
# create a method where you pass in a manifest and it checks to see if the id is unique
8+
# if it is not unique, then it should raise a validation error
9+
def check(manifest):
10+
11+
duplicates = []
12+
ids = []
13+
checkNode(manifest, ids, duplicates)
14+
15+
if len(duplicates) > 0:
16+
return duplicates
17+
else:
18+
return None
19+
20+
def checkNode(node, ids=[], duplicates=[], path = ""):
21+
if type(node) != dict:
22+
return
23+
24+
for key, value in node.items():
25+
if key == 'id':
26+
if type(value) != str:
27+
raise ValueError(f"Id must be a string: {value}")
28+
if value in ids:
29+
duplicates.append(ErrorDetail(
30+
f"Duplicate id found",
31+
"The id field must be unique",
32+
f"Duplicate id: {value}",
33+
path + "/" + key,
34+
create_snippet(node),
35+
None
36+
))
37+
ids.append(value)
38+
else:
39+
# Don't look further in fields that point to other resources
40+
if key in ignore:
41+
continue
42+
43+
if type(value) == list:
44+
count = 0
45+
for item in value:
46+
checkNode(item, ids, duplicates, path + "/" + key + "[" + str(count) + "]")
47+
count += 1
48+
49+
elif type(value) != str:
50+
checkNode(value, ids, duplicates, path + "/" + key)
51+
52+
def main():
53+
# pass in manifest by command line argument
54+
# load json from file
55+
with open(sys.argv[1], 'r') as f:
56+
manifest = json.load(f)
57+
58+
check(manifest)
59+
60+
if __name__ == '__main__':
61+
main()

0 commit comments

Comments
 (0)