From 59e58836f966b9ade06374818ace953aa6950016 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:16:34 +0300 Subject: [PATCH 1/8] Embed SBOM into wheels --- .github/embed-sbom.py | 91 ++++++++++++++++++++++++++++++++++++ .github/workflows/wheels.yml | 31 ++++++++++-- 2 files changed, 117 insertions(+), 5 deletions(-) create mode 100644 .github/embed-sbom.py diff --git a/.github/embed-sbom.py b/.github/embed-sbom.py new file mode 100644 index 00000000000..6a999231ffa --- /dev/null +++ b/.github/embed-sbom.py @@ -0,0 +1,91 @@ +"""Embed Pillow's SBOM into each wheel's `.dist-info/sboms/` directory, +as specified by PEP 770. + +The SBOM (produced by `generate-sbom.py`) is injected into every `.whl` in +the given directory, updating each wheel's `RECORD` so the result remains a +valid, installable wheel. +""" + +from __future__ import annotations + +import argparse +import base64 +import hashlib +import sys +import zipfile +from pathlib import Path + + +def record_entry(path: str, data: bytes) -> str: + """Build a RECORD line: `path,sha256=,`.""" + digest = base64.urlsafe_b64encode(hashlib.sha256(data).digest()) + return f"{path},sha256={digest.rstrip(b'=').decode()},{len(data)}" + + +def embed(wheel: Path, sbom_name: str, sbom_bytes: bytes) -> None: + with zipfile.ZipFile(wheel) as zf: + infos = zf.infolist() + contents = {info.filename: zf.read(info.filename) for info in infos} + + record_name = next( + name + for name in contents + if name.endswith(".dist-info/RECORD") and name.count("/") == 1 + ) + dist_info = record_name.rsplit("/", 1)[0] + sbom_path = f"{dist_info}/sboms/{sbom_name}" + + # Append a matching RECORD line for the SBOM (RECORD's own line has no hash). + lines = contents[record_name].decode("utf-8").splitlines() + lines.append(record_entry(sbom_path, sbom_bytes)) + contents[record_name] = ("\n".join(lines) + "\n").encode("utf-8") + + tmp = wheel.with_name(wheel.name + ".tmp") + with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf: + # Re-use each original ZipInfo to preserve timestamps, mode bits and + # compression; only RECORD's contents change. + for info in infos: + zf.writestr(info, contents[info.filename]) + zf.writestr(sbom_path, sbom_bytes) + tmp.replace(wheel) + + +def load_sbom(path: Path) -> tuple[str, bytes]: + """Read the SBOM; `path` may be the file or a directory containing one.""" + if path.is_dir(): + candidates = list(path.glob("*.cdx.json")) + if len(candidates) != 1: + msg = f"expected exactly one *.cdx.json in {path}, found {len(candidates)}" + raise SystemExit(msg) + path = candidates[0] + return path.name, path.read_bytes() + + +def main() -> None: + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "wheelhouse", type=Path, help="directory of wheels to embed the SBOM into" + ) + parser.add_argument( + "sbom", + type=Path, + help="SBOM file, or a directory containing a single `.cdx.json`", + ) + args = parser.parse_args() + + sbom_name, sbom_bytes = load_sbom(args.sbom) + + wheels = sorted(args.wheelhouse.glob("*.whl")) + if not wheels: + print(f"error: no wheels found in {args.wheelhouse}", file=sys.stderr) + raise SystemExit(1) + + for wheel in wheels: + embed(wheel, sbom_name, sbom_bytes) + print(f"Embedded {sbom_name} in {wheel.name}") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 959fd0e26d7..67390cac182 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -15,6 +15,7 @@ on: - ".ci/requirements-sbom.txt" - ".github/compare-dist-sizes.py" - ".github/dependencies.json" + - ".github/embed-sbom.py" - ".github/generate-sbom.py" - ".github/workflows/wheels*" - "pyproject.toml" @@ -41,6 +42,7 @@ env: jobs: build-native-wheels: + needs: sbom if: github.event_name != 'schedule' || github.event.repository.fork == false name: ${{ matrix.name }} runs-on: ${{ matrix.os }} @@ -128,12 +130,22 @@ jobs: CIBW_ENVIRONMENT_PASS_LINUX: FORCE_COLOR MACOSX_DEPLOYMENT_TARGET: ${{ matrix.macosx_deployment_target }} + - name: Download SBOM + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: sbom + path: sbom + + - name: Embed SBOM in wheels + run: python3 .github/embed-sbom.py wheelhouse sbom + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: dist-${{ matrix.name }} path: ./wheelhouse/*.whl windows: + needs: sbom if: github.event_name != 'schedule' || github.event.repository.fork == false name: Windows ${{ matrix.cibw_arch }} runs-on: ${{ matrix.os }} @@ -207,6 +219,15 @@ jobs: powershell C:\pillow\.github\workflows\wheels-test.ps1 %CD%\..\venv-test' shell: bash + - name: Download SBOM + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: sbom + path: sbom + + - name: Embed SBOM in wheels + run: python .github/embed-sbom.py wheelhouse sbom + - name: Upload wheels uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: @@ -315,17 +336,17 @@ jobs: - name: Generate CycloneDX SBOM run: python3 .github/generate-sbom.py + - name: Validate SBOM + run: | + python3 -m pip install -r .ci/requirements-sbom.txt + check-jsonschema --schemafile "https://raw.githubusercontent.com/CycloneDX/specification/1.7/schema/bom-1.7.schema.json" pillow-*.cdx.json + - name: Upload SBOM as workflow artifact uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: sbom path: "pillow-*.cdx.json" - - name: Validate SBOM - run: | - python3 -m pip install -r .ci/requirements-sbom.txt - check-jsonschema --schemafile "https://raw.githubusercontent.com/CycloneDX/specification/1.7/schema/bom-1.7.schema.json" pillow-*.cdx.json - sbom-publish: if: | github.event.repository.fork == false From 33a66fdf2a024f2516c71800d406eed5d77de588 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 17 Jun 2026 14:17:36 +0300 Subject: [PATCH 2/8] Move sbom job before others that need it --- .github/workflows/wheels.yml | 54 ++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 67390cac182..358b596c4a5 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -41,6 +41,33 @@ env: FORCE_COLOR: 1 jobs: + sbom: + if: github.event_name != 'schedule' || github.event.repository.fork == false + runs-on: ubuntu-latest + name: Generate SBOM + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: "3.x" + + - name: Generate CycloneDX SBOM + run: python3 .github/generate-sbom.py + + - name: Validate SBOM + run: | + python3 -m pip install -r .ci/requirements-sbom.txt + check-jsonschema --schemafile "https://raw.githubusercontent.com/CycloneDX/specification/1.7/schema/bom-1.7.schema.json" pillow-*.cdx.json + + - name: Upload SBOM as workflow artifact + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: sbom + path: "pillow-*.cdx.json" + build-native-wheels: needs: sbom if: github.event_name != 'schedule' || github.event.repository.fork == false @@ -320,33 +347,6 @@ jobs: artifacts_path: dist anaconda_nightly_upload_token: ${{ secrets.ANACONDA_ORG_UPLOAD_TOKEN }} - sbom: - if: github.event_name != 'schedule' || github.event.repository.fork == false - runs-on: ubuntu-latest - name: Generate SBOM - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - - - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: "3.x" - - - name: Generate CycloneDX SBOM - run: python3 .github/generate-sbom.py - - - name: Validate SBOM - run: | - python3 -m pip install -r .ci/requirements-sbom.txt - check-jsonschema --schemafile "https://raw.githubusercontent.com/CycloneDX/specification/1.7/schema/bom-1.7.schema.json" pillow-*.cdx.json - - - name: Upload SBOM as workflow artifact - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: sbom - path: "pillow-*.cdx.json" - sbom-publish: if: | github.event.repository.fork == false From 662d5f1e8874782b6fc8bc7e2212d721893645ae Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Sat, 20 Jun 2026 16:28:23 +1000 Subject: [PATCH 3/8] Read SBOM inside embed() --- .github/embed-sbom.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/embed-sbom.py b/.github/embed-sbom.py index 6a999231ffa..2691196faa9 100644 --- a/.github/embed-sbom.py +++ b/.github/embed-sbom.py @@ -22,7 +22,7 @@ def record_entry(path: str, data: bytes) -> str: return f"{path},sha256={digest.rstrip(b'=').decode()},{len(data)}" -def embed(wheel: Path, sbom_name: str, sbom_bytes: bytes) -> None: +def embed(wheel: Path, sbom: Path) -> None: with zipfile.ZipFile(wheel) as zf: infos = zf.infolist() contents = {info.filename: zf.read(info.filename) for info in infos} @@ -33,7 +33,9 @@ def embed(wheel: Path, sbom_name: str, sbom_bytes: bytes) -> None: if name.endswith(".dist-info/RECORD") and name.count("/") == 1 ) dist_info = record_name.rsplit("/", 1)[0] - sbom_path = f"{dist_info}/sboms/{sbom_name}" + + sbom_bytes = sbom.read_bytes() + sbom_path = f"{dist_info}/sboms/{sbom.name}" # Append a matching RECORD line for the SBOM (RECORD's own line has no hash). lines = contents[record_name].decode("utf-8").splitlines() @@ -49,16 +51,18 @@ def embed(wheel: Path, sbom_name: str, sbom_bytes: bytes) -> None: zf.writestr(sbom_path, sbom_bytes) tmp.replace(wheel) + print(f"Embedded {sbom.name} in {wheel.name}") + -def load_sbom(path: Path) -> tuple[str, bytes]: - """Read the SBOM; `path` may be the file or a directory containing one.""" +def scan_dir(path: Path) -> Path: + """If `path` is a directory, return the path of the SBOM within.""" if path.is_dir(): candidates = list(path.glob("*.cdx.json")) if len(candidates) != 1: msg = f"expected exactly one *.cdx.json in {path}, found {len(candidates)}" raise SystemExit(msg) - path = candidates[0] - return path.name, path.read_bytes() + return candidates[0] + return path def main() -> None: @@ -75,7 +79,7 @@ def main() -> None: ) args = parser.parse_args() - sbom_name, sbom_bytes = load_sbom(args.sbom) + sbom = scan_dir(args.sbom) wheels = sorted(args.wheelhouse.glob("*.whl")) if not wheels: @@ -83,8 +87,7 @@ def main() -> None: raise SystemExit(1) for wheel in wheels: - embed(wheel, sbom_name, sbom_bytes) - print(f"Embedded {sbom_name} in {wheel.name}") + embed(wheel, sbom) if __name__ == "__main__": From 02c716ca76bba08e0ffacf83b125a96535a608c3 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Sat, 20 Jun 2026 16:37:13 +1000 Subject: [PATCH 4/8] Avoid decoding and re-encoding --- .github/embed-sbom.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/embed-sbom.py b/.github/embed-sbom.py index 2691196faa9..49dfa9f713b 100644 --- a/.github/embed-sbom.py +++ b/.github/embed-sbom.py @@ -16,10 +16,16 @@ from pathlib import Path -def record_entry(path: str, data: bytes) -> str: +def record_entry(path: str, data: bytes) -> bytes: """Build a RECORD line: `path,sha256=,`.""" digest = base64.urlsafe_b64encode(hashlib.sha256(data).digest()) - return f"{path},sha256={digest.rstrip(b'=').decode()},{len(data)}" + return ( + path.encode("utf-8") + + b",sha256=" + + digest.rstrip(b"=") + + b"," + + str(len(data)).encode() + ) def embed(wheel: Path, sbom: Path) -> None: @@ -38,9 +44,9 @@ def embed(wheel: Path, sbom: Path) -> None: sbom_path = f"{dist_info}/sboms/{sbom.name}" # Append a matching RECORD line for the SBOM (RECORD's own line has no hash). - lines = contents[record_name].decode("utf-8").splitlines() + lines = contents[record_name].splitlines() lines.append(record_entry(sbom_path, sbom_bytes)) - contents[record_name] = ("\n".join(lines) + "\n").encode("utf-8") + contents[record_name] = b"\n".join(lines) + b"\n" tmp = wheel.with_name(wheel.name + ".tmp") with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf: From 195e28f158378d7278cc2aefdd8f4f99a6d93096 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 24 Jun 2026 15:39:53 +0300 Subject: [PATCH 5/8] Replace print+SystemExit with parser.error Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- .github/embed-sbom.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/embed-sbom.py b/.github/embed-sbom.py index 49dfa9f713b..e6441f7ced1 100644 --- a/.github/embed-sbom.py +++ b/.github/embed-sbom.py @@ -89,8 +89,7 @@ def main() -> None: wheels = sorted(args.wheelhouse.glob("*.whl")) if not wheels: - print(f"error: no wheels found in {args.wheelhouse}", file=sys.stderr) - raise SystemExit(1) + parser.error(f"no wheels found in {args.wheelhouse}") for wheel in wheels: embed(wheel, sbom) From 287a17ef99f0094a53fe6867fb3a47ca363bc854 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 24 Jun 2026 12:40:27 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .github/embed-sbom.py | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/embed-sbom.py b/.github/embed-sbom.py index e6441f7ced1..0c22a40c4a6 100644 --- a/.github/embed-sbom.py +++ b/.github/embed-sbom.py @@ -11,7 +11,6 @@ import argparse import base64 import hashlib -import sys import zipfile from pathlib import Path From b2c4126217beb8dfc378fd96c5403399a6f65926 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 27 Jun 2026 12:25:11 +0300 Subject: [PATCH 7/8] Run with `python3` not `python` Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 358b596c4a5..f3c44f1f517 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -253,7 +253,7 @@ jobs: path: sbom - name: Embed SBOM in wheels - run: python .github/embed-sbom.py wheelhouse sbom + run: python3 .github/embed-sbom.py wheelhouse sbom - name: Upload wheels uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 From a5d4f3d48ea10ece48905fe882068a48003b2db8 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 27 Jun 2026 20:25:07 +0300 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com> --- .github/embed-sbom.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/embed-sbom.py b/.github/embed-sbom.py index 0c22a40c4a6..64c4b4b124d 100644 --- a/.github/embed-sbom.py +++ b/.github/embed-sbom.py @@ -37,7 +37,7 @@ def embed(wheel: Path, sbom: Path) -> None: for name in contents if name.endswith(".dist-info/RECORD") and name.count("/") == 1 ) - dist_info = record_name.rsplit("/", 1)[0] + dist_info = record_name.split("/", 1)[0] sbom_bytes = sbom.read_bytes() sbom_path = f"{dist_info}/sboms/{sbom.name}" @@ -47,14 +47,12 @@ def embed(wheel: Path, sbom: Path) -> None: lines.append(record_entry(sbom_path, sbom_bytes)) contents[record_name] = b"\n".join(lines) + b"\n" - tmp = wheel.with_name(wheel.name + ".tmp") - with zipfile.ZipFile(tmp, "w", zipfile.ZIP_DEFLATED) as zf: - # Re-use each original ZipInfo to preserve timestamps, mode bits and - # compression; only RECORD's contents change. - for info in infos: - zf.writestr(info, contents[info.filename]) - zf.writestr(sbom_path, sbom_bytes) - tmp.replace(wheel) + with zipfile.ZipFile(wheel, "w", zipfile.ZIP_DEFLATED) as zf: + # Re-use each original ZipInfo to preserve timestamps, mode bits and + # compression; only RECORD's contents change + for info in infos: + zf.writestr(info, contents[info.filename]) + zf.writestr(sbom_path, sbom_bytes) print(f"Embedded {sbom.name} in {wheel.name}")