Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 121 additions & 17 deletions scanner/badges.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,54 @@
"""SVG and shields.io-endpoint JSON badge generators.

The JSON shape matches shields.io's ``endpoint`` badge contract so a consumer
can embed ``https://img.shields.io/endpoint?url=<our-json-url>`` directly in
a README. The SVG endpoint generates a flat-style badge inline (no shields.io
dependency, no network hop) for sites that want the badge served from the
same origin as the rest of the report.

Stability: ``schemaVersion``, ``label``, ``message``, ``color``, and
``cacheSeconds`` are part of the v1 contract. The SVG layout (two-rect flat
badge, 11px Verdana) is the contract for ``.svg`` consumers.
JSON endpoints (``/badge/<name>.json``) match shields.io's ``endpoint``
contract; field names are part of the v1 stability promise.

SVG endpoints (``/badge/<name>.svg``) serve shields.io's ``for-the-badge``
style fetched at publish time. The inline two-rect flat renderer is kept
as a fallback for when shields.io is unreachable.
"""

from __future__ import annotations

import logging
import urllib.parse
import urllib.request
Comment thread
DevelopmentCats marked this conversation as resolved.
from typing import Any
from xml.sax.saxutils import escape as _xml_escape

log = logging.getLogger(__name__)

SHIELDS_SCHEMA_VERSION = 1

# Cache hint for shields.io: 5 minutes lines up with the registry-server
# proxy's cache TTL and the catalogue refresh cadence.
DEFAULT_CACHE_SECONDS = 300

# Canonical badge style. Any shields.io built-in works here.
DEFAULT_BADGE_STYLE = "for-the-badge"

# ``static/v1`` takes label/message/color as query params so we don't have
# to URL-encode dashes the way the path-style endpoint requires.
_SHIELDS_STATIC_ENDPOINT = "https://img.shields.io/static/v1"

_SHIELDS_FETCH_TIMEOUT_SECONDS = 10

# shields.io 403s the default ``Python-urllib`` UA, so identify ourselves.
_SHIELDS_USER_AGENT = (
"coder-skill-scanner/1 (+https://scanner.registry.coder.com)"
)

# Set to True after the first shields.io failure inside a run, so the rest
# of the run skips straight to the fallback. Process exits between runs.
_shields_disabled_for_run: bool = False


def reset_shields_circuit_breaker() -> None:
"""Clear ``_shields_disabled_for_run``. Used by tests."""
global _shields_disabled_for_run
_shields_disabled_for_run = False


# Shields.io color names.
_VERDICT_COLORS: dict[str, str] = {
"clean": "brightgreen",
Expand Down Expand Up @@ -151,13 +178,90 @@ def _flat_badge_svg(label: str, message: str, color_hex: str) -> str:
)


def status_badge_svg(verdict: str) -> str:
"""Render the status badge (categorical scan outcome) as inline SVG."""
color = _NAMED_HEX[_VERDICT_COLORS.get(verdict, "lightgrey")]
return _flat_badge_svg("skill scan", verdict, color)
def fetch_shields_io_svg(
label: str,
message: str,
color: str,
*,
style: str = DEFAULT_BADGE_STYLE,
timeout: float = _SHIELDS_FETCH_TIMEOUT_SECONDS,
) -> str | None:
"""Fetch a rendered SVG from shields.io, or ``None`` on any failure.

First failure inside a run flips ``_shields_disabled_for_run`` so the
rest of the run short-circuits to the fallback renderer.
"""
global _shields_disabled_for_run
if _shields_disabled_for_run:
return None

def score_badge_svg(risk_score: int) -> str:
"""Render the score badge (numeric risk score) as inline SVG."""
color = _NAMED_HEX[_risk_color(risk_score)]
return _flat_badge_svg("risk score", f"{risk_score}/100", color)
query = urllib.parse.urlencode(
{"label": label, "message": message, "color": color, "style": style}
)
url = f"{_SHIELDS_STATIC_ENDPOINT}?{query}"
# shields.io rejects Python's default ``Python-urllib/x.y`` UA with a 403,
# so we identify the scanner explicitly. Same User-Agent value we would
# use to scrape the catalogue.
req = urllib.request.Request(
url,
headers={"User-Agent": _SHIELDS_USER_AGENT, "Accept": "image/svg+xml"},
)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
if resp.status != 200:
log.warning(
"shields.io HTTP %s for %s; disabling fetch this run",
resp.status,
url,
)
_shields_disabled_for_run = True
return None
body = resp.read().decode("utf-8")
except Exception as exc:
log.warning(
"shields.io fetch failed for %s: %s; disabling fetch this run",
url,
exc,
)
_shields_disabled_for_run = True
return None
if not body.lstrip().startswith("<svg"):
log.warning(
"shields.io non-SVG body for %s; disabling fetch this run",
url,
)
_shields_disabled_for_run = True
return None
return body


def status_badge_svg(verdict: str, *, style: str = DEFAULT_BADGE_STYLE) -> str:
"""Render the status badge (categorical scan outcome) as an SVG string.

Tries shields.io first in the requested ``style`` so the registry and
third-party READMEs see byte-identical, canonical-style badges; falls
back to a self-contained ``flat`` rendering if shields.io is unreachable.
"""
color_name = _VERDICT_COLORS.get(verdict, "lightgrey")
shielded = fetch_shields_io_svg(
"skill scan", verdict, color_name, style=style
)
if shielded is not None:
return shielded
return _flat_badge_svg("skill scan", verdict, _NAMED_HEX[color_name])


def score_badge_svg(risk_score: int, *, style: str = DEFAULT_BADGE_STYLE) -> str:
"""Render the score badge (numeric risk score) as an SVG string.

Same shields-first / inline-fallback behaviour as
:func:`status_badge_svg`.
"""
color_name = _risk_color(risk_score)
message = f"{risk_score}/100"
shielded = fetch_shields_io_svg(
"risk score", message, color_name, style=style
)
if shielded is not None:
return shielded
return _flat_badge_svg("risk score", message, _NAMED_HEX[color_name])
6 changes: 4 additions & 2 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,13 @@ def test_write_api_v1_writes_full_tree(tmp_path: Path):
setup_detail = json.loads((tmp_path / "skills" / "coder" / "setup.json").read_text())
assert setup_detail["verdict"] == "malicious"

# Badge files exist and the SVGs are well-formed strings.
# Badge files exist and the SVGs are well-formed strings. for-the-badge
# uppercases the message, so accept either case here - this test is about
# write_api_v1 wiring, not the badge renderer's exact output.
verdict_svg = (tmp_path / "skills" / "coder" / "setup" / "badge" / "status.svg").read_text()
assert verdict_svg.startswith("<svg")
assert verdict_svg.rstrip().endswith("</svg>")
assert "malicious" in verdict_svg
assert "malicious" in verdict_svg.lower()

risk_json = json.loads(
(tmp_path / "skills" / "coder" / "setup" / "badge" / "score.json").read_text()
Expand Down
166 changes: 160 additions & 6 deletions tests/test_badges.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,19 @@

import re

import pytest

from scanner import badges


@pytest.fixture(autouse=True)
def _reset_shields_circuit_breaker():
"""Otherwise a fail-path test would disable shields for everything after it."""
badges.reset_shields_circuit_breaker()
yield
badges.reset_shields_circuit_breaker()


def test_status_badge_json_colors_by_state():
assert badges.status_badge_json("clean")["color"] == "brightgreen"
assert badges.status_badge_json("suspicious")["color"] == "yellow"
Expand Down Expand Up @@ -43,7 +53,11 @@ def test_score_badge_json_carries_shields_contract():
assert payload["message"] == "42/100"


def test_status_badge_svg_is_well_formed():
def test_status_badge_svg_fallback_is_well_formed(monkeypatch):
"""When shields.io is unreachable, status_badge_svg returns the inline
flat-style fallback. Verify that fallback path still produces a usable
badge so the publish job never ships an empty file."""
monkeypatch.setattr(badges, "fetch_shields_io_svg", lambda *a, **k: None)
svg = badges.status_badge_svg("clean")
assert svg.startswith("<svg ")
assert svg.rstrip().endswith("</svg>")
Expand All @@ -55,7 +69,9 @@ def test_status_badge_svg_is_well_formed():
assert "#4c1" in svg, "clean verdict should use the brightgreen hex"


def test_score_badge_svg_color_threshold():
def test_score_badge_svg_color_threshold_fallback(monkeypatch):
"""Fallback path: bands map to the right shields hex."""
monkeypatch.setattr(badges, "fetch_shields_io_svg", lambda *a, **k: None)
high = badges.score_badge_svg(95)
low = badges.score_badge_svg(5)
# Red vs brightgreen hex.
Expand All @@ -66,13 +82,16 @@ def test_score_badge_svg_color_threshold():
assert "5/100" in low


def test_svg_width_grows_with_message_length():
"""Width estimation has to widen for longer text or the badge clips."""
def test_svg_width_grows_with_message_length(monkeypatch):
"""Width estimation in the fallback renderer has to widen for longer
text or the badge clips. shields.io's renderer makes the same guarantee
but we test the local one because we control its layout."""
monkeypatch.setattr(badges, "fetch_shields_io_svg", lambda *a, **k: None)
short = badges.status_badge_svg("clean")
long_ = badges.status_badge_svg("suspicious")
# Pull the width attribute from the opening tag.
sw = int(re.search(r'width="(\d+)"', short).group(1))
lw = int(re.search(r'width="(\d+)"', long_).group(1))
sw = int(re.search(r'width="([\d.]+)"', short).group(1).split(".")[0])
lw = int(re.search(r'width="([\d.]+)"', long_).group(1).split(".")[0])
assert lw > sw


Expand All @@ -89,3 +108,138 @@ def test_svg_escapes_markup_in_label_and_message():
# And the unescaped sequences must not appear anywhere in the output.
for needle in ('<script', '>"b', '&: '):
assert needle not in raw


def test_fetch_shields_io_svg_constructs_canonical_url(monkeypatch):
"""The fetcher must hit shields.io's static/v1 endpoint with our
label/message/color/style as query params, and identify itself with our
User-Agent so shields.io doesn't 403 us like it does the urllib default."""
captured: dict[str, object] = {}

class _FakeResp:
status = 200

def read(self):
return b'<svg xmlns="http://www.w3.org/2000/svg"/>'

def __enter__(self):
return self

def __exit__(self, *_a):
return False

def _fake_urlopen(req, timeout):
captured["url"] = req.full_url
captured["ua"] = req.get_header("User-agent")
captured["timeout"] = timeout
return _FakeResp()

monkeypatch.setattr(badges.urllib.request, "urlopen", _fake_urlopen)
svg = badges.fetch_shields_io_svg(
"skill scan", "clean", "brightgreen", style="for-the-badge"
)

assert svg is not None and svg.startswith("<svg")
url = captured["url"]
assert url.startswith("https://img.shields.io/static/v1?")
assert "label=skill+scan" in url
assert "message=clean" in url
assert "color=brightgreen" in url
assert "style=for-the-badge" in url
assert "coder-skill-scanner" in captured["ua"]


def test_fetch_shields_io_svg_returns_none_on_http_error(monkeypatch):
"""A 5xx (or any non-200) shields response must surface as None so the
caller falls back to the inline renderer, not a broken image."""

class _BadResp:
status = 503

def read(self):
return b"oops"

def __enter__(self):
return self

def __exit__(self, *_a):
return False

monkeypatch.setattr(
badges.urllib.request, "urlopen", lambda *a, **k: _BadResp()
)
assert (
badges.fetch_shields_io_svg(
"skill scan", "clean", "brightgreen"
)
is None
)


def test_fetch_shields_io_svg_returns_none_on_exception(monkeypatch):
"""A connection error must surface as None, not propagate. Otherwise the
publish-pages job blows up the first time shields.io has an outage."""

def _boom(*_a, **_k):
raise TimeoutError("network down")

monkeypatch.setattr(badges.urllib.request, "urlopen", _boom)
assert badges.fetch_shields_io_svg("a", "b", "red") is None


def test_status_badge_svg_uses_shields_when_available(monkeypatch):
"""Happy-path: shields.io returns a body, we surface those exact bytes."""
sentinel = '<svg xmlns="http://www.w3.org/2000/svg">SHIELDS</svg>'
monkeypatch.setattr(
badges, "fetch_shields_io_svg", lambda *a, **k: sentinel
)
assert badges.status_badge_svg("clean") == sentinel


def test_score_badge_svg_passes_message_and_color_to_shields(monkeypatch):
"""The score badge has to flow ``<n>/100`` and the banded colour name into
the shields call, not the local hex (shields wants colour names)."""
captured: dict[str, object] = {}

def _capture(label, message, color, *, style):
captured["label"] = label
captured["message"] = message
captured["color"] = color
captured["style"] = style
return "<svg/>"

monkeypatch.setattr(badges, "fetch_shields_io_svg", _capture)
badges.score_badge_svg(95)
assert captured == {
"label": "risk score",
"message": "95/100",
"color": "red",
"style": badges.DEFAULT_BADGE_STYLE,
}


def test_shields_circuit_breaker_short_circuits_after_first_failure(monkeypatch):
"""After the first failure, every subsequent call short-circuits without
touching the network. Bounds worst-case publish stall to one timeout."""
call_count = {"n": 0}

def _boom(*_a, **_k):
call_count["n"] += 1
raise TimeoutError("network down")

monkeypatch.setattr(badges.urllib.request, "urlopen", _boom)

first = badges.fetch_shields_io_svg("skill scan", "clean", "brightgreen")
second = badges.fetch_shields_io_svg("skill scan", "suspicious", "yellow")
third = badges.fetch_shields_io_svg("risk score", "99/100", "red")

assert first is None
assert second is None
assert third is None
assert call_count["n"] == 1, "network must be touched exactly once"


def test_shields_circuit_breaker_resets_for_next_run():
badges._shields_disabled_for_run = True
badges.reset_shields_circuit_breaker()
assert badges._shields_disabled_for_run is False