diff --git a/scanner/badges.py b/scanner/badges.py index 06bd189..7dcdcc7 100644 --- a/scanner/badges.py +++ b/scanner/badges.py @@ -1,27 +1,54 @@ """SVG and shields.io-endpoint JSON badge generators. -The JSON shape matches shields.io's ``endpoint`` badge contract so a consumer -can embed ``https://img.shields.io/endpoint?url=`` directly in -a README. The SVG endpoint generates a flat-style badge inline (no shields.io -dependency, no network hop) for sites that want the badge served from the -same origin as the rest of the report. - -Stability: ``schemaVersion``, ``label``, ``message``, ``color``, and -``cacheSeconds`` are part of the v1 contract. The SVG layout (two-rect flat -badge, 11px Verdana) is the contract for ``.svg`` consumers. +JSON endpoints (``/badge/.json``) match shields.io's ``endpoint`` +contract; field names are part of the v1 stability promise. + +SVG endpoints (``/badge/.svg``) serve shields.io's ``for-the-badge`` +style fetched at publish time. The inline two-rect flat renderer is kept +as a fallback for when shields.io is unreachable. """ from __future__ import annotations +import logging +import urllib.parse +import urllib.request from typing import Any from xml.sax.saxutils import escape as _xml_escape +log = logging.getLogger(__name__) + SHIELDS_SCHEMA_VERSION = 1 # Cache hint for shields.io: 5 minutes lines up with the registry-server # proxy's cache TTL and the catalogue refresh cadence. DEFAULT_CACHE_SECONDS = 300 +# Canonical badge style. Any shields.io built-in works here. +DEFAULT_BADGE_STYLE = "for-the-badge" + +# ``static/v1`` takes label/message/color as query params so we don't have +# to URL-encode dashes the way the path-style endpoint requires. +_SHIELDS_STATIC_ENDPOINT = "https://img.shields.io/static/v1" + +_SHIELDS_FETCH_TIMEOUT_SECONDS = 10 + +# shields.io 403s the default ``Python-urllib`` UA, so identify ourselves. +_SHIELDS_USER_AGENT = ( + "coder-skill-scanner/1 (+https://scanner.registry.coder.com)" +) + +# Set to True after the first shields.io failure inside a run, so the rest +# of the run skips straight to the fallback. Process exits between runs. +_shields_disabled_for_run: bool = False + + +def reset_shields_circuit_breaker() -> None: + """Clear ``_shields_disabled_for_run``. Used by tests.""" + global _shields_disabled_for_run + _shields_disabled_for_run = False + + # Shields.io color names. _VERDICT_COLORS: dict[str, str] = { "clean": "brightgreen", @@ -151,13 +178,90 @@ def _flat_badge_svg(label: str, message: str, color_hex: str) -> str: ) -def status_badge_svg(verdict: str) -> str: - """Render the status badge (categorical scan outcome) as inline SVG.""" - color = _NAMED_HEX[_VERDICT_COLORS.get(verdict, "lightgrey")] - return _flat_badge_svg("skill scan", verdict, color) +def fetch_shields_io_svg( + label: str, + message: str, + color: str, + *, + style: str = DEFAULT_BADGE_STYLE, + timeout: float = _SHIELDS_FETCH_TIMEOUT_SECONDS, +) -> str | None: + """Fetch a rendered SVG from shields.io, or ``None`` on any failure. + First failure inside a run flips ``_shields_disabled_for_run`` so the + rest of the run short-circuits to the fallback renderer. + """ + global _shields_disabled_for_run + if _shields_disabled_for_run: + return None -def score_badge_svg(risk_score: int) -> str: - """Render the score badge (numeric risk score) as inline SVG.""" - color = _NAMED_HEX[_risk_color(risk_score)] - return _flat_badge_svg("risk score", f"{risk_score}/100", color) + query = urllib.parse.urlencode( + {"label": label, "message": message, "color": color, "style": style} + ) + url = f"{_SHIELDS_STATIC_ENDPOINT}?{query}" + # shields.io rejects Python's default ``Python-urllib/x.y`` UA with a 403, + # so we identify the scanner explicitly. Same User-Agent value we would + # use to scrape the catalogue. + req = urllib.request.Request( + url, + headers={"User-Agent": _SHIELDS_USER_AGENT, "Accept": "image/svg+xml"}, + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + if resp.status != 200: + log.warning( + "shields.io HTTP %s for %s; disabling fetch this run", + resp.status, + url, + ) + _shields_disabled_for_run = True + return None + body = resp.read().decode("utf-8") + except Exception as exc: + log.warning( + "shields.io fetch failed for %s: %s; disabling fetch this run", + url, + exc, + ) + _shields_disabled_for_run = True + return None + if not body.lstrip().startswith(" str: + """Render the status badge (categorical scan outcome) as an SVG string. + + Tries shields.io first in the requested ``style`` so the registry and + third-party READMEs see byte-identical, canonical-style badges; falls + back to a self-contained ``flat`` rendering if shields.io is unreachable. + """ + color_name = _VERDICT_COLORS.get(verdict, "lightgrey") + shielded = fetch_shields_io_svg( + "skill scan", verdict, color_name, style=style + ) + if shielded is not None: + return shielded + return _flat_badge_svg("skill scan", verdict, _NAMED_HEX[color_name]) + + +def score_badge_svg(risk_score: int, *, style: str = DEFAULT_BADGE_STYLE) -> str: + """Render the score badge (numeric risk score) as an SVG string. + + Same shields-first / inline-fallback behaviour as + :func:`status_badge_svg`. + """ + color_name = _risk_color(risk_score) + message = f"{risk_score}/100" + shielded = fetch_shields_io_svg( + "risk score", message, color_name, style=style + ) + if shielded is not None: + return shielded + return _flat_badge_svg("risk score", message, _NAMED_HEX[color_name]) diff --git a/tests/test_api.py b/tests/test_api.py index 420805f..f67c14a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -146,11 +146,13 @@ def test_write_api_v1_writes_full_tree(tmp_path: Path): setup_detail = json.loads((tmp_path / "skills" / "coder" / "setup.json").read_text()) assert setup_detail["verdict"] == "malicious" - # Badge files exist and the SVGs are well-formed strings. + # Badge files exist and the SVGs are well-formed strings. for-the-badge + # uppercases the message, so accept either case here - this test is about + # write_api_v1 wiring, not the badge renderer's exact output. verdict_svg = (tmp_path / "skills" / "coder" / "setup" / "badge" / "status.svg").read_text() assert verdict_svg.startswith("") - assert "malicious" in verdict_svg + assert "malicious" in verdict_svg.lower() risk_json = json.loads( (tmp_path / "skills" / "coder" / "setup" / "badge" / "score.json").read_text() diff --git a/tests/test_badges.py b/tests/test_badges.py index 81735b4..eb90aac 100644 --- a/tests/test_badges.py +++ b/tests/test_badges.py @@ -4,9 +4,19 @@ import re +import pytest + from scanner import badges +@pytest.fixture(autouse=True) +def _reset_shields_circuit_breaker(): + """Otherwise a fail-path test would disable shields for everything after it.""" + badges.reset_shields_circuit_breaker() + yield + badges.reset_shields_circuit_breaker() + + def test_status_badge_json_colors_by_state(): assert badges.status_badge_json("clean")["color"] == "brightgreen" assert badges.status_badge_json("suspicious")["color"] == "yellow" @@ -43,7 +53,11 @@ def test_score_badge_json_carries_shields_contract(): assert payload["message"] == "42/100" -def test_status_badge_svg_is_well_formed(): +def test_status_badge_svg_fallback_is_well_formed(monkeypatch): + """When shields.io is unreachable, status_badge_svg returns the inline + flat-style fallback. Verify that fallback path still produces a usable + badge so the publish job never ships an empty file.""" + monkeypatch.setattr(badges, "fetch_shields_io_svg", lambda *a, **k: None) svg = badges.status_badge_svg("clean") assert svg.startswith("") @@ -55,7 +69,9 @@ def test_status_badge_svg_is_well_formed(): assert "#4c1" in svg, "clean verdict should use the brightgreen hex" -def test_score_badge_svg_color_threshold(): +def test_score_badge_svg_color_threshold_fallback(monkeypatch): + """Fallback path: bands map to the right shields hex.""" + monkeypatch.setattr(badges, "fetch_shields_io_svg", lambda *a, **k: None) high = badges.score_badge_svg(95) low = badges.score_badge_svg(5) # Red vs brightgreen hex. @@ -66,13 +82,16 @@ def test_score_badge_svg_color_threshold(): assert "5/100" in low -def test_svg_width_grows_with_message_length(): - """Width estimation has to widen for longer text or the badge clips.""" +def test_svg_width_grows_with_message_length(monkeypatch): + """Width estimation in the fallback renderer has to widen for longer + text or the badge clips. shields.io's renderer makes the same guarantee + but we test the local one because we control its layout.""" + monkeypatch.setattr(badges, "fetch_shields_io_svg", lambda *a, **k: None) short = badges.status_badge_svg("clean") long_ = badges.status_badge_svg("suspicious") # Pull the width attribute from the opening tag. - sw = int(re.search(r'width="(\d+)"', short).group(1)) - lw = int(re.search(r'width="(\d+)"', long_).group(1)) + sw = int(re.search(r'width="([\d.]+)"', short).group(1).split(".")[0]) + lw = int(re.search(r'width="([\d.]+)"', long_).group(1).split(".")[0]) assert lw > sw @@ -89,3 +108,138 @@ def test_svg_escapes_markup_in_label_and_message(): # And the unescaped sequences must not appear anywhere in the output. for needle in ('"b', '&: '): assert needle not in raw + + +def test_fetch_shields_io_svg_constructs_canonical_url(monkeypatch): + """The fetcher must hit shields.io's static/v1 endpoint with our + label/message/color/style as query params, and identify itself with our + User-Agent so shields.io doesn't 403 us like it does the urllib default.""" + captured: dict[str, object] = {} + + class _FakeResp: + status = 200 + + def read(self): + return b'' + + def __enter__(self): + return self + + def __exit__(self, *_a): + return False + + def _fake_urlopen(req, timeout): + captured["url"] = req.full_url + captured["ua"] = req.get_header("User-agent") + captured["timeout"] = timeout + return _FakeResp() + + monkeypatch.setattr(badges.urllib.request, "urlopen", _fake_urlopen) + svg = badges.fetch_shields_io_svg( + "skill scan", "clean", "brightgreen", style="for-the-badge" + ) + + assert svg is not None and svg.startswith("/100`` and the banded colour name into + the shields call, not the local hex (shields wants colour names).""" + captured: dict[str, object] = {} + + def _capture(label, message, color, *, style): + captured["label"] = label + captured["message"] = message + captured["color"] = color + captured["style"] = style + return "" + + monkeypatch.setattr(badges, "fetch_shields_io_svg", _capture) + badges.score_badge_svg(95) + assert captured == { + "label": "risk score", + "message": "95/100", + "color": "red", + "style": badges.DEFAULT_BADGE_STYLE, + } + + +def test_shields_circuit_breaker_short_circuits_after_first_failure(monkeypatch): + """After the first failure, every subsequent call short-circuits without + touching the network. Bounds worst-case publish stall to one timeout.""" + call_count = {"n": 0} + + def _boom(*_a, **_k): + call_count["n"] += 1 + raise TimeoutError("network down") + + monkeypatch.setattr(badges.urllib.request, "urlopen", _boom) + + first = badges.fetch_shields_io_svg("skill scan", "clean", "brightgreen") + second = badges.fetch_shields_io_svg("skill scan", "suspicious", "yellow") + third = badges.fetch_shields_io_svg("risk score", "99/100", "red") + + assert first is None + assert second is None + assert third is None + assert call_count["n"] == 1, "network must be touched exactly once" + + +def test_shields_circuit_breaker_resets_for_next_run(): + badges._shields_disabled_for_run = True + badges.reset_shields_circuit_breaker() + assert badges._shields_disabled_for_run is False