Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
5f300f7
Adding early version of unfinished schema for v4
glenrobson Jan 21, 2026
1e4142c
Fixing quotes
glenrobson Jan 21, 2026
cb02979
Validating simple Timeline and Scene
glenrobson Jan 22, 2026
ea8b00a
Removing AnnoCollectionRef as no longer used
glenrobson Feb 4, 2026
d97102d
Ensuring language is an array
glenrobson Feb 11, 2026
3feec7f
Adding new shape for annotations
glenrobson Feb 22, 2026
ab0d9b7
Adding space
glenrobson Feb 22, 2026
07bcd14
Adding 3d objects
glenrobson Feb 26, 2026
6a78700
Adding another allowed duplicate id field
glenrobson Feb 26, 2026
45ccaf1
Re-org of Audio, Lights and Cameras
glenrobson Mar 3, 2026
6b9512a
Allowing a Canvas as a body
glenrobson Mar 4, 2026
e65895c
Allowing Timeline as a body
glenrobson Mar 5, 2026
9b0e6b8
Allowing Scene as annotation bodies
glenrobson Mar 11, 2026
304dcf8
Allowing equal to 90: https://github.com/IIIF/api/issues/2439
glenrobson Mar 12, 2026
9a37608
Fixing URL
glenrobson Mar 12, 2026
27a5831
Fixing duplicate URI
glenrobson Mar 12, 2026
0074078
Ensuring Specific Resource has id and type
glenrobson Mar 13, 2026
8fbf3ac
Ensuring @context included with manifest and collection
glenrobson Mar 13, 2026
15d9391
Adding language and ensuring its an array
glenrobson Mar 13, 2026
6b55b35
Adding lang to External
glenrobson Mar 13, 2026
871c525
Merge branch 'main' into v4
glenrobson Mar 21, 2026
d46d4c1
Adding Github action instructions
glenrobson Mar 21, 2026
bcb1d1f
Moving test to uv
glenrobson Mar 21, 2026
5f301d0
Fixing path to schema files
glenrobson Mar 21, 2026
1e9fe6e
Integrating v4 schema support
glenrobson Mar 21, 2026
50df8ff
Fixing path to schema dir
glenrobson Mar 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/check-jsonschema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Check JSON Schema $ref integrity

on:
pull_request:
types: [opened, synchronize, reopened]
# Optional: only run when schemas or the script change
paths:
- "schema/**"
- ".github/workflows/check-jsonschema.yml"

permissions:
contents: read

jobs:
check-refs:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: 3.12

- name: Set up uv
uses: astral-sh/setup-uv@v6
with:
enable-cache: true

- name: Sync dependencies
run: uv sync --all-extras --dev

- name: Run $ref checker
run: |
uv run python -m presentation_validator.v4.check_refs
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

Expand Down
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,21 @@ This should start up a local server, running at <localhost:8080>. To test it, tr
}
```
You may also use `--hostname` to specify a hostname or IP address to which to bind and `--port` for a port to which to bind.


## Github action

It is also possible to run the validator against JSON documents which are in a Github repository using a github action. An example is below:

```
- name: Run IIIF validator
uses: IIIF/presentation-validator@main
with:
directory: path/to/json
version: 3
extension: .json
```

* `directory` is required. The validator will start at this directory and then go through any sub-directories looking for json files to validate.
* `version` is optional. If supplied any json files will be validated against this version. If its not supplied the validator will look inside the json to see which version is specified in the @context
* `extension` is optional and defaults to .json. - name: Run IIIF validator
42 changes: 42 additions & 0 deletions fixtures/4/bad/dup_id.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"@context": "http://iiif.io/api/presentation/4/context.json",
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas.json",
"type": "Manifest",
"label": {
"en": [
"Canvas and first annotation page have same id"
]
},
"items": [
{
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/canvas/p1",
"type": "Canvas",
"height": 1800,
"width": 1200,
"items": [
{
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/canvas/p1",
"type": "AnnotationPage",
"items": [
{
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/annotation/p0001-image",
"type": "Annotation",
"motivation": [ "painting" ],
"body": {
"id": "http://iiif.io/api/presentation/2.1/example/fixtures/resources/page1-full.png",
"type": "Image",
"format": "image/png",
"height": 1800,
"width": 1200
},
"target": {
"id": "https://iiif.io/api/presentation/4.0/example/03_canvas/canvas/p1",
"type": "Canvas"
}
}
]
}
]
}
]
}
40 changes: 40 additions & 0 deletions fixtures/4/ok/02_timeline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"@context": "http://iiif.io/api/presentation/4/context.json",
"id": "https://iiif.io/api/presentation/4.0/example/02_timeline.json",
"type": "Manifest",
"label": {
"en": [
"Simplest Audio Example (IIIF Presentation v4)"
]
},
"items": [
{
"id": "https://iiif.io/api/presentation/4.0/example/02",
"type": "Timeline",
"duration": 1985.024,
"items": [
{
"id": "https://iiif.io/api/presentation/4.0/example/02/page",
"type": "AnnotationPage",
"items": [
{
"id": "https://iiif.io/api/presentation/4.0/example/02/page/anno",
"type": "Annotation",
"motivation": ["painting"],
"body": {
"id": "https://fixtures.iiif.io/audio/indiana/mahler-symphony-3/CD1/medium/128Kbps.mp4",
"type": "Audio",
"format": "audio/mp4",
"duration": 1985.024
},
"target": {
"id": "https://iiif.io/api/presentation/4.0/example/02",
"type": "Timeline"
}
}
]
}
]
}
]
}
58 changes: 36 additions & 22 deletions presentation_validator/v3/schemavalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,39 @@ def printPath(pathObj, fields):
path += '/[{}]'.format(fields)
return path

def create_snippet(data):
# Take possibly a large JSON document and only show the fields at the current level
for key in data:
if isinstance(data[key], list):
data[key] = '[ ... ]'
elif isinstance(data[key], dict):
data[key] = '{ ... }'

return data

def convertValidationError(err, errorCount, total):
detail = ''
if 'title' in err.schema:
detail = err.schema['title']
description = ''
if 'description' in err.schema:
detail += ' ' + err.schema['description']
context = err.instance
if isinstance(context, dict):
for key in context:
if isinstance(context[key], list):
context[key] = '[ ... ]'
elif isinstance(context[key], dict):
context[key] = '{ ... }'

return ErrorDetail(
f"Error {errorCount} of {total}.\n Message: {err.message}",
detail,
description,
printPath(err.path, err.message),
context,
err)

def validate(data, version, url):
if version == IIIFVersion.V3_0:
with open(f'{SCHEMA_DIR}/iiif_3_0.json') as json_file:
Expand Down Expand Up @@ -90,31 +123,12 @@ def validate(data, version, url):
if errorPath not in seen_titles:
errors.append(errorDup)
seen_titles.add(errorPath)

errorCount = 1
# Now create some useful messsages to pass on
for err in errors:
detail = ''
if 'title' in err.schema:
detail = err.schema['title']
description = ''
if 'description' in err.schema:
detail += ' ' + err.schema['description']
context = err.instance
if isinstance(context, dict):
for key in context:
if isinstance(context[key], list):
context[key] = '[ ... ]'
elif isinstance(context[key], dict):
context[key] = '{ ... }'

result.errorList.append(ErrorDetail(
'Error {} of {}.\n Message: {}'.format(errorCount, len(errors), err.message),
detail,
description,
printPath(err.path, err.message),
context,
err))
#print (json.dumps(err.instance, indent=4))
result.errorList.append(convertValidationError(err, errorCount, len(errors)))

errorCount += 1

# Return:
Expand Down
99 changes: 99 additions & 0 deletions presentation_validator/v4/check_refs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple
from urllib.parse import urljoin, urlparse
from pathlib import PurePosixPath

from referencing import Registry, Resource
from referencing.exceptions import Unresolvable

def iter_refs(node: Any, path: str = "#") -> Iterable[Tuple[str, str]]:
"""
Yield ($ref_value, json_pointer_path_in_schema) for every $ref in a schema tree.
"""
#print (f"Travesring {node}")
if isinstance(node, dict):
if "$ref" in node and isinstance(node["$ref"], str):
yield node["$ref"], path + "/$ref"
for k, v in node.items():
yield from iter_refs(v, f"{path}/{k}")
elif isinstance(node, list):
for i, v in enumerate(node):
yield from iter_refs(v, f"{path}/{i}")


def build_registry_from_dir(schema_dir: str | Path) -> Tuple[Registry, Dict[str, Dict[str, Any]]]:
"""
Load all *.json schemas under schema_dir into a referencing.Registry.

Each resource is keyed by:
- its $id, if present, else
- a file:// URI for its absolute path.
"""
schema_dir = Path(schema_dir)
registry = Registry()
by_uri: Dict[str, Dict[str, Any]] = {}

found=False
for path in schema_dir.rglob("*.json"):
found = True # Found at least one JSON file
with path.open("r", encoding="utf-8") as f:
schema = json.load(f)

print (f"Loading {path.name}")
uri = schema.get("$id")
if not uri:
uri = path.resolve().as_uri() # file:///.../schema.json

resource = Resource.from_contents(schema)
registry = registry.with_resource(uri, resource)
if uri in by_uri:
raise Exception(f"Duplicate schema ID {uri} found in {path.name}")

by_uri[uri] = schema

if not found:
raise FileNotFoundError(f"No JSON files found in {schema_dir}")

return registry, by_uri


def find_missing_refs_in_dir(schema_dir: str | Path) -> List[Dict[str, str]]:
"""
Returns a list of unresolved $refs across all schemas in schema_dir.
"""
registry, schemas = build_registry_from_dir(schema_dir)
missing: List[Dict[str, str]] = []
for base_uri, schema in schemas.items():
resolver = registry.resolver(base_uri=base_uri)

for ref, where in iter_refs(schema):
# Make relative refs absolute against the schema's base URI
target = urljoin(base_uri, ref)

try:
resolver.lookup(target)
except Unresolvable:
missing.append(
{
"schema": base_uri,
"ref": ref,
"where": where,
"resolved_target": target,
}
)

return missing


if __name__ == "__main__":
problems = find_missing_refs_in_dir(Path(__file__).resolve().parent.parent.parent / "schema" / "v4")
if problems:
print("\nMissing/unresolvable $refs:")
for p in problems:
print(f"- In {p['schema'].split('/')[-1]}:\n at {p['where']}: {p['ref']} (→ {p['resolved_target']})\n")
raise SystemExit(2)
else:
print("All $refs resolved.")
61 changes: 61 additions & 0 deletions presentation_validator/v4/unique_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import sys
import json
from presentation_validator.model import ErrorDetail
from presentation_validator.v3.schemavalidator import create_snippet

ignore = ["target", "lookAt", "range","structures","first","last","start","source"]
# create a method where you pass in a manifest and it checks to see if the id is unique
# if it is not unique, then it should raise a validation error
def check(manifest):

duplicates = []
ids = []
checkNode(manifest, ids, duplicates)

if len(duplicates) > 0:
return duplicates
else:
return None

def checkNode(node, ids=[], duplicates=[], path = ""):
if type(node) != dict:
return

for key, value in node.items():
if key == 'id':
if type(value) != str:
raise ValueError(f"Id must be a string: {value}")
if value in ids:
duplicates.append(ErrorDetail(
f"Duplicate id found",
"The id field must be unique",
f"Duplicate id: {value}",
path + "/" + key,
create_snippet(node),
None
))
ids.append(value)
else:
# Don't look further in fields that point to other resources
if key in ignore:
continue

if type(value) == list:
count = 0
for item in value:
checkNode(item, ids, duplicates, path + "/" + key + "[" + str(count) + "]")
count += 1

elif type(value) != str:
checkNode(value, ids, duplicates, path + "/" + key)

def main():
# pass in manifest by command line argument
# load json from file
with open(sys.argv[1], 'r') as f:
manifest = json.load(f)

check(manifest)

if __name__ == '__main__':
main()
Loading
Loading