From 8bcc659c788da9828163683281e83ddba0dc3414 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 6 May 2026 04:09:39 +0000
Subject: [PATCH 1/3] docs(benchmarks-website): fold planning/ into
 code-adjacent docs

Replace the nine-file `benchmarks-website/planning/` directory with
project-level READMEs and module-level `//!` docs anchored to the code
they describe.

- New `benchmarks-website/README.md` is the v3 entry point: side-by-side
  with v2 explanation, architecture summary, route table, local-dev and
  deployment recipes, and the cutover plan.
- New `benchmarks-website/AGENTS.md` carries just the still-load-bearing
  agent norms (don't touch v2, don't add a server-side classifier, the
  tooltip / slider / wheel-pan footguns).
- `server/src/lib.rs` gains a crate-level module map + request-flow
  walkthrough (was: components/server.md).
- `server/src/schema.rs` gains the full design-principles + per-table
  contract walkthrough (was: 01-schema.md).
- `server/src/records.rs` and `server/src/ingest.rs` gain the wire
  envelope + HTTP-matrix docs (was: 02-contracts.md).
- `vortex-bench/src/v3.rs` gains the producer mapping + per-binary
  inventory + per-suite query dim values table (was: benchmark-mapping.md
  and components/emitter.md).
- The TODO breadcrumb on `api::charts::collect_group_charts` is rewritten
  to match the way the user already tracks the N+1 follow-up; the
  removed `planning/README.md` cross-reference goes away.
- Decisions from `decisions.md` are folded into the relevant module
  doc where they still drive current behavior; everything else (alpha
  history, deferred phase-2 items) is dropped.

Signed-off-by: Claude <noreply@anthropic.com>
---
 benchmarks-website/AGENTS.md                  |  76 ++++++
 benchmarks-website/README.md                  | 108 +++++++++
 benchmarks-website/planning/00-overview.md    | 104 --------
 benchmarks-website/planning/01-schema.md      | 228 ------------------
 benchmarks-website/planning/02-contracts.md   | 227 -----------------
 benchmarks-website/planning/AGENTS.md         | 172 -------------
 benchmarks-website/planning/README.md         | 188 ---------------
 .../planning/benchmark-mapping.md             | 147 -----------
 .../planning/components/emitter.md            |  86 -------
 .../planning/components/server.md             |  70 ------
 .../planning/components/web-ui.md             |  62 -----
 benchmarks-website/planning/decisions.md      |  95 --------
 benchmarks-website/planning/deferred.md       | 118 ---------
 benchmarks-website/server/src/api/charts.rs   |   9 +-
 benchmarks-website/server/src/api/dto.rs      |   8 +-
 benchmarks-website/server/src/api/mod.rs      |   7 +-
 benchmarks-website/server/src/ingest.rs       |  30 ++-
 benchmarks-website/server/src/lib.rs          |  63 ++++-
 benchmarks-website/server/src/records.rs      |  44 +++-
 benchmarks-website/server/src/schema.rs       | 100 +++++++-
 vortex-bench/src/datasets/mod.rs              |   7 +-
 vortex-bench/src/v3.rs                        |  86 ++++++-
 22 files changed, 501 insertions(+), 1534 deletions(-)
 create mode 100644 benchmarks-website/AGENTS.md
 create mode 100644 benchmarks-website/README.md
 delete mode 100644 benchmarks-website/planning/00-overview.md
 delete mode 100644 benchmarks-website/planning/01-schema.md
 delete mode 100644 benchmarks-website/planning/02-contracts.md
 delete mode 100644 benchmarks-website/planning/AGENTS.md
 delete mode 100644 benchmarks-website/planning/README.md
 delete mode 100644 benchmarks-website/planning/benchmark-mapping.md
 delete mode 100644 benchmarks-website/planning/components/emitter.md
 delete mode 100644 benchmarks-website/planning/components/server.md
 delete mode 100644 benchmarks-website/planning/components/web-ui.md
 delete mode 100644 benchmarks-website/planning/decisions.md
 delete mode 100644 benchmarks-website/planning/deferred.md

diff --git a/benchmarks-website/AGENTS.md b/benchmarks-website/AGENTS.md
new file mode 100644
index 00000000000..bf00e48b855
--- /dev/null
+++ b/benchmarks-website/AGENTS.md
@@ -0,0 +1,76 @@
+<!--
+SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright the Vortex contributors
+-->
+
+# AGENTS.md — `benchmarks-website/`
+
+Read [`README.md`](README.md) first for the architecture and the v2/v3
+side-by-side situation. Then this file. The root [`CLAUDE.md`](../CLAUDE.md)
+covers Rust style, test layout, commit conventions.
+
+## Don't touch the v2 site
+
+Until the cutover PR lands, the top-level v2 files
+(`server.js`, `src/`, `index.html`, `vite.config.js`, `package.json`,
+`package-lock.json`, `public/`, the top-level `Dockerfile`,
+`docker-compose.yml`, `ec2-init.txt`) and the `benchmarks-website` service
+in `docker-compose.yml` and the `publish-benchmarks-website.yml` workflow
+are production. Don't edit them as part of unrelated work.
+
+## v3 specifics
+
+- **Wire shapes are a coordinated change.** [`server/src/records.rs`](server/src/records.rs),
+  [`vortex-bench/src/v3.rs`](../vortex-bench/src/v3.rs), and (until cutover)
+  [`migrate/src/classifier.rs`](migrate/src/classifier.rs) must agree.
+  Bumping a shape means changing all three plus the snapshot fixtures in
+  one commit.
+- **`measurement_id` is server-internal.** Never put it on the wire. It is
+  a deterministic hash over `commit_sha` plus the dim tuple, computed in
+  [`server/src/db.rs`](server/src/db.rs) and reused by the migrator via
+  the same crate.
+- **Don't write a server-side classifier for live ingest.** The emitter
+  produces v3-shape records directly; the migrator's classifier only
+  exists to translate v2 records once and goes away after cutover.
+- **Don't reach for WASM.** SSR + a thin hydration script in
+  [`server/static/chart-init.js`](server/static/chart-init.js) is the
+  whole client.
+- **Don't re-introduce a server-side commit cap.** `?n=all` is the default
+  for HTML routes; visual downsampling happens client-side via LTTB on the
+  visible commit range only.
+- **Don't refetch on every scope change.** The chart fetches its full
+  history once. Pan, zoom, slider, and the range strip rebuild in place
+  via the in-memory LTTB pass on the cached payload. The single exception
+  is the inline-payload zoom-out path: when the user zooms past the first
+  group's inlined `LANDING_INLINE_N` window for the first time,
+  `chart-init.js` lazy-fetches `?n=all` once and replaces the payload.
+
+## Footguns we have already hit
+
+- **Reverse predecessor walk in the tooltip.** `payload.commits[]` is
+  sorted oldest-first by SQL — `commits[0]` is the oldest, `commits[N-1]`
+  is the newest. For per-row delta the predecessor of `commits[idx]` is
+  at `idx - 1`. We caught a regression where a "fix" flipped this to
+  `idx + 1`; the original walk-backward direction is right.
+- **`pointer-events: auto` on the tooltip host.** The tooltip is
+  positioned at the cursor; making it pointer-interactive causes a
+  flicker loop. Keep it `pointer-events: none` and offset via
+  `transform: translate(12px, 12px)`.
+- **`change` events on the slider.** Use `input` events with a small
+  throttle; `change` only fires on release and feels broken.
+
+## Local dev
+
+```bash
+INGEST_BEARER_TOKEN=dev cargo run -p vortex-bench-server
+cargo nextest run -p vortex-bench-server -p vortex-bench-migrate
+INSTA_UPDATE=auto cargo nextest run -p vortex-bench-server   # update snapshots
+```
+
+For the migrator end-to-end against the real S3 dump:
+
+```bash
+cargo run -p vortex-bench-migrate -- run --output ./bench.duckdb
+VORTEX_BENCH_DB=./bench.duckdb INGEST_BEARER_TOKEN=dev \
+  cargo run -p vortex-bench-server
+```
diff --git a/benchmarks-website/README.md b/benchmarks-website/README.md
new file mode 100644
index 00000000000..6460b6c07b3
--- /dev/null
+++ b/benchmarks-website/README.md
@@ -0,0 +1,108 @@
+<!--
+SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright the Vortex contributors
+-->
+
+# bench.vortex.dev
+
+The website behind `bench.vortex.dev`. The directory currently houses **two
+implementations side by side**, run together until the v3 cutover lands:
+
+- **v2** (top-level files: `server.js`, `src/`, `index.html`, `vite.config.js`,
+  `package.json`, `Dockerfile`, `docker-compose.yml`, `ec2-init.txt`,
+  `public/`). The Node + React stack that has shipped to production for the
+  life of the site. Built and published by
+  [`.github/workflows/publish-benchmarks-website.yml`](../.github/workflows/publish-benchmarks-website.yml).
+- **v3** (`server/` + `migrate/`). A single Rust binary —
+  [`vortex-bench-server`](server/) — that owns a DuckDB file on local disk,
+  serves the API, and renders the HTML. Compiles all static assets
+  (`chart.umd.js`, `chart-init.js`, `style.css`) into the binary so deploys
+  are one file plus a database. Container image at
+  `ghcr.io/vortex-data/vortex/vortex-bench-server:latest`.
+  [`migrate/`](migrate/) is a one-shot tool that loads v2's S3 dataset into a
+  v3 DuckDB; it is throwaway and goes away after cutover.
+
+Live results are produced by
+[`.github/workflows/bench.yml`](../.github/workflows/bench.yml) and
+[`.github/workflows/sql-benchmarks.yml`](../.github/workflows/sql-benchmarks.yml),
+which CI runs after every push to `develop`. Until cutover the same payload is
+emitted to both stacks (v2 via the legacy `--gh-json` path appended to a public
+S3 bucket; v3 via `--gh-json-v3` POSTed to `/api/ingest`).
+
+## v3 architecture in one paragraph
+
+`axum` (HTTP) + `maud` (compile-time HTML) + embedded `duckdb-rs` over a single
+local DB file. Five fact tables (`query_measurements`, `compression_times`,
+`compression_sizes`, `random_access_times`, `vector_search_runs`) plus a
+`commits` dim table — see [`server/src/schema.rs`](server/src/schema.rs) for
+the column contracts. Three HTML routes (`/`, `/chart/{slug}`,
+`/group/{slug}`) and four JSON routes (`GET /api/groups`,
+`GET /api/chart/{slug}`, `GET /api/group/{slug}`, `GET /health`), plus a
+bearer-gated `POST /api/ingest`. Charts render inline on the landing page via
+SSR + lazy hydration; visual downsampling (LTTB at most
+`MAX_VISIBLE_POINTS = 500`) is client-side in
+[`server/static/chart-init.js`](server/static/chart-init.js).
+
+For the per-module crate map and the request-flow walkthrough, see the
+`//!` doc on [`server/src/lib.rs`](server/src/lib.rs). The producer side of
+the ingest contract lives in
+[`vortex-bench/src/v3.rs`](../vortex-bench/src/v3.rs); the historical-data
+side in [`migrate/src/classifier.rs`](migrate/src/classifier.rs).
+
+## Local dev
+
+```bash
+# v3 server (DuckDB lives at ./bench.duckdb by default).
+INGEST_BEARER_TOKEN=dev cargo run -p vortex-bench-server
+# server logs: "bench server listening addr=127.0.0.1:3000 db=bench.duckdb"
+
+# v3 historical migrator (writes a fully populated DuckDB the server can open).
+cargo run -p vortex-bench-migrate -- run --output ./bench.duckdb
+```
+
+Ingest fixture data via the snapshot tests' envelopes (see
+[`server/tests/common/mod.rs`](server/tests/common/mod.rs)) or by hand-rolling
+a JSONL file and POSTing through `scripts/post-ingest.py`.
+
+```bash
+cargo nextest run -p vortex-bench-server -p vortex-bench-migrate
+INSTA_UPDATE=auto cargo nextest run -p vortex-bench-server   # update snapshots
+```
+
+For the v2 stack:
+
+```bash
+cd benchmarks-website
+npm install
+npm run dev
+```
+
+## Deployment
+
+`docker-compose.yml` runs both stacks side by side: v2 on `:80` and v3 on
+`:3001`. `watchtower` polls GHCR every 60s so a fresh image push lands
+automatically. v3 reads `INGEST_BEARER_TOKEN` from
+`/etc/vortex-bench/secrets.env`, persists DuckDB to
+`/opt/benchmarks-website/data/bench.duckdb`, and binds `0.0.0.0:3000` so the
+container's `:3001` host port forwards through.
+
+The v3 server is throwaway-friendly: every request runs against the local
+DuckDB file, and a fresh boot reapplies the schema DDL idempotently. The
+migrator deletes the target file (and its `.wal`) before populating it, so
+re-running `vortex-bench-migrate run --output ...` is safe.
+
+## Cutover plan (in flight)
+
+The work to flip `bench.vortex.dev` from v2 to v3 is tracked outside this
+repo. The relevant code-side bits:
+
+- v3 deploys today on a separate EC2 host and is exercised by CI's dual-write
+  step against a test bearer token.
+- v2 keeps shipping unchanged until DNS flips. **Do not touch the top-level
+  v2 files unless you are doing the cleanup PR opened post-flip.**
+- The v2 cleanup PR removes everything top-level under `benchmarks-website/`
+  that belongs to v2 (`server.js`, `src/`, `index.html`, `vite.config.js`,
+  `package.json`, `package-lock.json`, `public/`, the top-level `Dockerfile`,
+  `docker-compose.yml`, `ec2-init.txt`, and the
+  `publish-benchmarks-website.yml` workflow). The v3 tree under `server/` and
+  `migrate/` is untouched.
diff --git a/benchmarks-website/planning/00-overview.md b/benchmarks-website/planning/00-overview.md
deleted file mode 100644
index c6c3e05c57f..00000000000
--- a/benchmarks-website/planning/00-overview.md
+++ /dev/null
@@ -1,104 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# 00 - Overview
-
-## What we're building
-
-A replacement for the current `bench.vortex.dev` site. The new
-stack is a **single Rust binary** (axum + maud + duckdb-rs) that
-owns a **DuckDB database** on local disk and serves the website
-plus an `/api/ingest` route. CI eventually POSTs new benchmark
-results there. There is no separate ingester service, no S3
-coordination layer for writes, no client-side WASM.
-
-The server crate is `vortex-bench-server` at
-`benchmarks-website/server/`.
-
-## Phasing
-
-We build this in two phases. **Plan only the first.**
-
-### Alpha (this plan)
-
-The smallest end-to-end loop that proves the design:
-
-1. **Schema** locked enough to ingest one benchmark result.
-2. **Server**: open DuckDB, accept a bearer-token-authenticated POST,
-   serve a couple of read routes.
-3. **Emitter**: `vortex-bench --gh-json-v3` + a tiny POST script.
-4. **Web UI**: one landing page + one chart page rendered against a
-   fixture DB.
-
-That's it. No production deploy, no historical data import, no CI
-workflow integration, no admin tooling, no schema migration
-framework, no auth beyond the shared bearer token. All of those
-live in [`deferred.md`](./deferred.md).
-
-The alpha runs on a developer machine. v2 keeps running in
-production unchanged. There is no cutover in alpha.
-
-### Phase 2 and beyond
-
-Once the alpha loop is green, we layer in production deploy,
-historical migration, CI dual-write, and the rest of the v2-parity
-work. Stubs are in [`deferred.md`](./deferred.md).
-
-## Architecture (alpha)
-
-One process, one DB file. The server is the API and the website.
-The emitter writes JSONL of bare records; a small POST script
-wraps and uploads them. CI isn't wired up yet; ingest happens
-manually during alpha.
-
-## Components
-
-Three components for alpha. Each is one workstream, one branch, one
-PR.
-
-| Component | Plan | Owns |
-|---|---|---|
-| Server | [components/server.md](./components/server.md) | DuckDB open + schema, bearer-auth ingest, read routes, HTML routes mounted from web-ui |
-| Emitter | [components/emitter.md](./components/emitter.md) | `vortex-bench --gh-json-v3` + the post-ingest script |
-| Web UI | [components/web-ui.md](./components/web-ui.md) | Landing page + chart page, against a fixture DuckDB |
-
-### Dependencies
-
-The schema feeds all three components. The contracts feed the
-server and the emitter. With both stable, **all three components
-can be worked on in parallel**.
-
-## Goals
-
-In priority order:
-
-1. **End-to-end alpha loop works.** Emit → POST → store → render.
-2. **Schema is the right shape.** Five fact tables (one per
-   measurement family) plus a `commits` dim. See
-   [`01-schema.md`](./01-schema.md).
-3. **Each component is small enough that one agent can finish it
-   in one PR.** No mega-PRs.
-
-Cutover, parity, and "faster than v2" are explicit non-goals at
-alpha; they come back in phase 2.
-
-## Shared docs
-
-- [`00-overview.md`](./00-overview.md) (this file)
-- [`01-schema.md`](./01-schema.md) - the five fact tables + `commits`
-- [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP error
-  matrix + auth header
-- [`benchmark-mapping.md`](./benchmark-mapping.md) - existing
-  benchmarks → fact tables
-- [`decisions.md`](./decisions.md) - resolved decisions
-- [`deferred.md`](./deferred.md) - phase-2 stubs
-
-## Status of v2 during alpha
-
-v2 stays in production untouched. Do not edit
-`benchmarks-website/server.js`, `benchmarks-website/src/`, or any
-other v2 files at `benchmarks-website/` top level. v3 lives in the
-sibling subdirectory at `benchmarks-website/server/`
-(`vortex-bench-server` crate).
diff --git a/benchmarks-website/planning/01-schema.md b/benchmarks-website/planning/01-schema.md
deleted file mode 100644
index dfc6b05ba27..00000000000
--- a/benchmarks-website/planning/01-schema.md
+++ /dev/null
@@ -1,228 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# 01 - DuckDB schema (alpha)
-
-The persistent data model. **One `commits` dim table plus five fact
-tables, one per measurement family.** No lookup tables, no views, no
-migration framework; those are deferred (see
-[`deferred.md`](./deferred.md)).
-
-## Design principles
-
-1. **One fact table per (dim shape, value shape).** A row in any
-   fact table has every value column populated; NULLs only appear
-   in genuinely optional dimensions.
-2. **No discriminator columns spanning families.** No `metric_kind`
-   enum forcing five shapes into one row.
-3. **No JSON escape hatch.** New benchmark parameters become real
-   columns. Adding a nullable column is cheap; the readability win
-   is worth it.
-4. **Hashed primary key per table.** Each fact table has a
-   `measurement_id` that is a deterministic 64-bit hash of
-   `commit_sha` plus that table's dimensional tuple. Including
-   `commit_sha` makes every (commit, dim) pair a distinct row -
-   that's what the chart pages render as a time series.
-   Server-internal; not on the wire.
-5. **`commits` is the only dim table.** Engine, format, dataset,
-   etc. stay as inline strings; DuckDB's dictionary encoding makes
-   a lookup table pointless.
-6. **Ratios are not stored.** Computed at query time from
-   `compression_sizes`.
-
-## Why five fact tables, not one
-
-The five families have genuinely different shapes:
-
-| Table | Shape sketch |
-|---|---|
-| `query_measurements` | dataset + query_idx + engine + format + storage → timing **and** memory |
-| `compression_times` | dataset + format + op∈{encode,decode} → timing |
-| `compression_sizes` | dataset + format → bytes |
-| `random_access_times` | dataset + format → timing (different dataset namespace) |
-| `vector_search_runs` | dataset + layout + flavor + threshold → timing + counters |
-
-Forcing them into one table either bloats every row with columns
-that are NULL for ~99% of rows (`layout`, `flavor`, `threshold`,
-`matches`, `rows_scanned`, `bytes_scanned`) or splits scan results
-across multiple rows that have to be re-joined to render one chart.
-
-## Group / chart / series fit
-
-The render-time view used by `/api/groups` and `/api/chart/:slug`
-is mechanically derivable per table:
-
-| Table | Group key | Chart key | Series key |
-|---|---|---|---|
-| `query_measurements` | `(dataset, dataset_variant, scale_factor, storage)` | `(dataset, query_idx)` | `(engine, format)` |
-| `compression_times` | constant `"Compression"` | `(dataset, dataset_variant)` | `(format, op)` |
-| `compression_sizes` | constant `"Compression Size"` | `(dataset, dataset_variant)` | `format` |
-| `random_access_times` | constant `"Random Access"` | `dataset` | `format` |
-| `vector_search_runs` | `(dataset, layout)` | `(dataset, layout, threshold)` | `flavor` |
-
-The classifier logic in v2's `v2-classifier.js` mostly disappears -
-each table already knows what suite it represents.
-
-## Tables
-
-DDL is the server's call. Below is the column contract: name, type
-family, and whether it's NOT NULL. The server agent picks exact
-DuckDB types, indexes, and constraint syntax.
-
-### `commits` (dim)
-
-| Column | Type | Required? | Notes |
-|---|---|---|---|
-| `commit_sha` | string | yes (PK) | 40-hex lowercase |
-| `timestamp` | timestamptz | yes | |
-| `message` | string | optional | first line only |
-| `author_name` | string | optional | |
-| `author_email` | string | optional | |
-| `committer_name` | string | optional | |
-| `committer_email` | string | optional | |
-| `tree_sha` | string | yes | |
-| `url` | string | yes | |
-
-Populated from the envelope on every `/api/ingest` call.
-
-### `query_measurements`
-
-SQL query suites: TPC-H, TPC-DS, ClickBench, StatPopGen,
-PolarSignals, Fineweb, GhArchive, Public-BI. Memory columns are
-populated when the run was instrumented for memory; NULL otherwise.
-Timing and memory share the row because they're produced together
-for the same query execution.
-
-| Column | Type | Required? | Notes |
-|---|---|---|---|
-| `measurement_id` | int64 | yes (PK) | hash of dim tuple |
-| `commit_sha` | string | yes | FK to `commits` |
-| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... |
-| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name |
-| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals |
-| `query_idx` | int32 | yes | 1-based |
-| `storage` | string | yes | `nvme` or `s3` |
-| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` |
-| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... |
-| `value_ns` | int64 | yes | median timing, ns |
-| `all_runtimes_ns` | list&lt;int64&gt; | yes | per-iteration timings |
-| `peak_physical` | int64 | optional | bytes |
-| `peak_virtual` | int64 | optional | bytes |
-| `physical_delta` | int64 | optional | bytes |
-| `virtual_delta` | int64 | optional | bytes |
-| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` |
-
-### `compression_times`
-
-Encode/decode timings from `compress-bench`.
-
-| Column | Type | Required? | Notes |
-|---|---|---|---|
-| `measurement_id` | int64 | yes (PK) | |
-| `commit_sha` | string | yes | FK |
-| `dataset` | string | yes | |
-| `dataset_variant` | string | optional | |
-| `format` | string | yes | |
-| `op` | string | yes | `encode` or `decode` |
-| `value_ns` | int64 | yes | |
-| `all_runtimes_ns` | list&lt;int64&gt; | yes | |
-| `env_triple` | string | optional | |
-
-### `compression_sizes`
-
-On-disk sizes from `compress-bench`. One-shot, no per-iteration data.
-Compression ratios in v2 (`vortex:parquet-zstd ratio/...`) are a
-SELECT over this table joined to itself; they're not stored.
-
-| Column | Type | Required? | Notes |
-|---|---|---|---|
-| `measurement_id` | int64 | yes (PK) | |
-| `commit_sha` | string | yes | FK |
-| `dataset` | string | yes | |
-| `dataset_variant` | string | optional | |
-| `format` | string | yes | |
-| `value_bytes` | int64 | yes | |
-
-### `random_access_times`
-
-Take-time timings from `random-access-bench`. Different dataset
-namespace from `compression_times` - kept in its own table so
-dataset filters never have to disambiguate which suite a row
-belongs to.
-
-| Column | Type | Required? | Notes |
-|---|---|---|---|
-| `measurement_id` | int64 | yes (PK) | |
-| `commit_sha` | string | yes | FK |
-| `dataset` | string | yes | |
-| `format` | string | yes | |
-| `value_ns` | int64 | yes | |
-| `all_runtimes_ns` | list&lt;int64&gt; | yes | |
-| `env_triple` | string | optional | |
-
-### `vector_search_runs`
-
-Cosine-similarity scans from `vector-search-bench`. The only family
-that emits a timing **plus side counters** for the same scan;
-keeping them in one row avoids a 1:N split that has to be re-joined
-on read.
-
-| Column | Type | Required? | Notes |
-|---|---|---|---|
-| `measurement_id` | int64 | yes (PK) | |
-| `commit_sha` | string | yes | FK |
-| `dataset` | string | yes | e.g. `cohere-large-10m` |
-| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` |
-| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` |
-| `threshold` | double | yes | cosine threshold |
-| `value_ns` | int64 | yes | per-scan wall time |
-| `all_runtimes_ns` | list&lt;int64&gt; | yes | |
-| `matches` | int64 | yes | |
-| `rows_scanned` | int64 | yes | |
-| `bytes_scanned` | int64 | yes | |
-| `iterations` | int32 | yes | not part of the dim hash |
-| `env_triple` | string | optional | |
-
-## `measurement_id` hash
-
-Per-table xxhash64 over `commit_sha` plus that table's dimensional
-tuple. Including `commit_sha` makes every (commit, dim) pair a
-distinct row, which is what the chart pages render as a time
-series. The hash is **server-internal** - the wire never carries
-it. The server's INSERT path computes it before each
-`INSERT ... ON CONFLICT DO UPDATE`, which gives idempotent upsert
-on re-emission of the same (commit, dim) pair. Encoding details
-(input order, NULL handling, byte layout) are the server's call,
-since the value never crosses a process boundary.
-
-When the historical migrator lands (deferred), it reuses the
-server's hash function via a shared crate.
-
-## Storage values
-
-`storage` is `'nvme'` or `'s3'`. Legacy `gcs` is dropped. Only
-`query_measurements` carries `storage` - the other families don't
-fan out by storage backend.
-
-## Schema changes during alpha
-
-There is no migration framework. If you change the schema:
-
-1. Update this doc.
-2. Update the server's DDL.
-3. Delete any local `bench.duckdb` and re-run.
-
-A real forward-only migration framework lands post-alpha. See
-[`deferred.md`](./deferred.md).
-
-## What's intentionally NOT here (deferred)
-
-- `schema_meta` and migration framework.
-- `known_engines` / `known_formats` / `known_datasets` lookup
-  tables and seed SQL.
-- Views (`v_compression_ratios`, `v_latest_per_group`, etc.).
-- Pre-downsampled aliases.
-- A `microbench_runs` table - reserved as the next family to add
-  when microbench results start landing.
diff --git a/benchmarks-website/planning/02-contracts.md b/benchmarks-website/planning/02-contracts.md
deleted file mode 100644
index 8f32072123d..00000000000
--- a/benchmarks-website/planning/02-contracts.md
+++ /dev/null
@@ -1,227 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# 02 - Wire contracts (alpha)
-
-The cross-component glue between the emitter, the POST script, and
-the server. Wire-format only - implementations are local to each
-component.
-
-If two components disagree about a shape, **this file is right**
-and both update.
-
-## Records are discriminated by `kind`
-
-Each record on the wire carries a `kind` field that picks one of
-the [five fact tables](./01-schema.md#tables). The emitter never
-decides "what column" - it decides "what kind", and the rest of the
-row is that kind's flat field set.
-
-| `kind` | Destination table |
-|---|---|
-| `query_measurement` | `query_measurements` |
-| `compression_time` | `compression_times` |
-| `compression_size` | `compression_sizes` |
-| `random_access_time` | `random_access_times` |
-| `vector_search_run` | `vector_search_runs` |
-
-**Unknown `kind` values cause a 400.** Unknown fields within a known
-`kind` also cause a 400. Version skew should fail loudly.
-
-## Per-kind record shapes
-
-All shared metadata first; per-kind fields after.
-
-### `query_measurement`
-
-| Field | Type | Required? | Notes |
-|---|---|---|---|
-| `kind` | `"query_measurement"` | yes | discriminator |
-| `commit_sha` | string | yes | 40-hex lowercase |
-| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... |
-| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name |
-| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals |
-| `query_idx` | integer | yes | 1-based |
-| `storage` | enum string | yes | `nvme` or `s3` |
-| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` |
-| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... |
-| `value_ns` | integer | yes | median timing, ns |
-| `all_runtimes_ns` | array&lt;integer&gt; | yes | per-iteration timings (may be empty) |
-| `peak_physical` | integer | optional | bytes |
-| `peak_virtual` | integer | optional | bytes |
-| `physical_delta` | integer | optional | bytes |
-| `virtual_delta` | integer | optional | bytes |
-| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` |
-
-The four memory fields are populated together (all four or none).
-
-### `compression_time`
-
-| Field | Type | Required? | Notes |
-|---|---|---|---|
-| `kind` | `"compression_time"` | yes | |
-| `commit_sha` | string | yes | |
-| `dataset` | string | yes | |
-| `dataset_variant` | string | optional | |
-| `format` | string | yes | |
-| `op` | enum string | yes | `encode` or `decode` |
-| `value_ns` | integer | yes | |
-| `all_runtimes_ns` | array&lt;integer&gt; | yes | |
-| `env_triple` | string | optional | |
-
-### `compression_size`
-
-| Field | Type | Required? | Notes |
-|---|---|---|---|
-| `kind` | `"compression_size"` | yes | |
-| `commit_sha` | string | yes | |
-| `dataset` | string | yes | |
-| `dataset_variant` | string | optional | |
-| `format` | string | yes | |
-| `value_bytes` | integer | yes | |
-
-### `random_access_time`
-
-| Field | Type | Required? | Notes |
-|---|---|---|---|
-| `kind` | `"random_access_time"` | yes | |
-| `commit_sha` | string | yes | |
-| `dataset` | string | yes | random-access dataset name (e.g. `chimp`, `taxi`) |
-| `format` | string | yes | |
-| `value_ns` | integer | yes | |
-| `all_runtimes_ns` | array&lt;integer&gt; | yes | |
-| `env_triple` | string | optional | |
-
-### `vector_search_run`
-
-| Field | Type | Required? | Notes |
-|---|---|---|---|
-| `kind` | `"vector_search_run"` | yes | |
-| `commit_sha` | string | yes | |
-| `dataset` | string | yes | e.g. `cohere-large-10m` |
-| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` |
-| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` |
-| `threshold` | number | yes | cosine threshold |
-| `value_ns` | integer | yes | per-scan wall time (median of iterations) |
-| `all_runtimes_ns` | array&lt;integer&gt; | yes | |
-| `matches` | integer | yes | |
-| `rows_scanned` | integer | yes | |
-| `bytes_scanned` | integer | yes | |
-| `iterations` | integer | yes | |
-| `env_triple` | string | optional | |
-
-## Ingest envelope
-
-`/api/ingest` accepts one envelope per POST. The envelope wraps a
-heterogeneous batch of records (any mix of `kind`s). Required
-top-level fields:
-
-- `run_meta`: object with `benchmark_id` (string), `schema_version`
-  (integer; `1` at alpha), `started_at` (RFC 3339 timestamp).
-- `commit`: object with the columns of the [`commits`
-  table](./01-schema.md#commits-dim), keyed by their column names
-  with `commit_sha` renamed to `sha`. The server upserts this row
-  before applying records.
-- `records`: array of per-`kind` records as defined above.
-
-`vortex-bench --gh-json-v3 <path>` writes JSONL of bare records
-only. The envelope (`run_meta` + `commit`) is added by the
-post-ingest script before POSTing - this keeps the Rust emitter
-dependency-light.
-
-The post-ingest script is responsible for filling the `commit`
-fields. CI has the SHA from `${{ github.sha }}`; the rest comes
-from `git show` or equivalent. See
-[`components/emitter.md`](./components/emitter.md).
-
-## HTTP matrix for `POST /api/ingest`
-
-| Condition | Status |
-|---|---|
-| Happy path | 200 with `{ "inserted": N, "updated": M }` |
-| Malformed JSON | 400 |
-| Unknown `kind`, unknown field, or per-record validation failure | 400 with the offending record index |
-| Missing/invalid bearer token | 401 |
-| Schema version newer than server expects | 409 |
-| Other server error | 500 |
-
-All-or-nothing per POST: a single failed record fails the whole
-batch. The reported `inserted` and `updated` counts are aggregated
-across all five tables.
-
-## Authentication header
-
-```text
-Authorization: Bearer <token>
-```
-
-Compared with constant-time equality on the server. Token comes from
-the `INGEST_BEARER_TOKEN` env var.
-
-## Slug grammar (server ↔ web-ui)
-
-The web-ui receives slugs from `/api/groups` and feeds them back
-into `/api/chart/:slug`. Slugs are **opaque strings** as far as the
-web-ui is concerned: it never parses or constructs them itself,
-only echoes what the API returned. The server is free to choose any
-slug format, change it without breaking the web-ui, or make it
-debuggable (e.g. `qm-tpch-q01-nvme-sf1`) - the only contract is
-"`/api/chart/:slug` accepts any slug `/api/groups` returned."
-
-## Read API
-
-Four JSON routes today. Field shapes are not binding; refine during
-implementation.
-
-### `GET /api/groups`
-
-A flat list of distinct group keys derivable from the data, with
-just enough metadata to link to a chart. The server walks each fact
-table to produce the group keys defined in
-[`01-schema.md`](./01-schema.md#group--chart--series-fit). Every
-chart entry includes a `slug` that round-trips through
-`/api/chart/:slug`, and every group has its own `slug` that
-round-trips through `/api/group/:slug`.
-
-### `GET /api/chart/:slug`
-
-Returns the data for one chart: a `display_name`, a `unit_kind`, an
-ordered `commits` list (sha + timestamp + first-line message + url),
-and a `series` map keyed by series name where each value is an
-array aligned to `commits` (with `null` for missing data points).
-Accepts `?n=&y=&mode=&hidden=` to scope the commit window and
-configure the rendered view.
-
-`unit_kind` is a small structured taxonomy that tells the client
-*what* the values are. Wire values stay in the kind's base unit; the
-client picks a display unit (e.g. `ms` for `time_ns` values around
-1e6) so the rendered axis stays readable. Worked example:
-`12,000,000,000` ns on the wire → `12 s` on the y-axis.
-
-| `unit_kind`         | Base unit on the wire   | Client display picker         |
-|---------------------|-------------------------|-------------------------------|
-| `time_ns`           | nanoseconds             | `ns | µs | ms | s` by magnitude |
-| `bytes`             | bytes                   | `B | KiB | MiB | GiB | TiB` (binary) |
-| `ratio`             | dimensionless ratio     | identity (no suffix)          |
-| `count`             | dimensionless count     | identity (no suffix)          |
-| `throughput_mb_s`   | megabytes per second    | identity, `MB/s` suffix       |
-
-Adding a variant is a wire-compat change: bump the emitter, the
-migrator, and the client unit picker in `chart-init.js` together.
-
-### `GET /api/group/:slug`
-
-Returns every chart in a group as a single batch payload, in render
-order. Used by the `/group/{slug}` HTML page and (today) by the
-landing page hydration path. Same query parameters as
-`/api/chart/:slug`.
-
-### `GET /health`
-
-Returns `{ status, db_path, schema_version, latest_commit_timestamp,
-row_counts }`. Cheap; suitable for load-balancer health checks.
-
-Per-commit page, range queries, and the rest of the read API are
-deferred. See [`deferred.md`](./deferred.md).
diff --git a/benchmarks-website/planning/AGENTS.md b/benchmarks-website/planning/AGENTS.md
deleted file mode 100644
index 719c47b4c22..00000000000
--- a/benchmarks-website/planning/AGENTS.md
+++ /dev/null
@@ -1,172 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# AGENTS.md - benchmarks-website v3
-
-Brief for coding agents working on the v3 rewrite of `bench.vortex.dev`. Keep this file short.
-Detail belongs in component plans.
-
-## Status
-
-Alpha is shipped. The v3 server, migrator, and inline-charts UI are all merged to
-`ct/benchmarks-v3`. The current focus is **production readiness**: secrets, CI ingestion wiring,
-smoke-testing on a real host, the DNS flip, and v2 cleanup. See [`README.md`](./README.md) for the
-live punch list.
-
-The v2 site (top-level files in `benchmarks-website/`: `server.js`, `src/`, `package.json`,
-`index.html`, `Dockerfile`, `docker-compose.yml`, `ec2-init.txt`, etc.) is still in production on
-`bench.vortex.dev` and **stays running unchanged** until the DNS flip. The v3 server lives alongside
-it as `vortex-bench-server` at `benchmarks-website/server/`.
-
-## Architecture in 10 bullets
-
-- Single Rust binary: `axum` (HTTP) + `maud` (SSR HTML) + embedded `duckdb-rs`. All static assets
-  (`chart.umd.js`, `chart-init.js`, `style.css`) are `include_bytes!`'d into the binary. No CDN.
-  A `tower-http` `CompressionLayer` wraps every response (gzip/brotli).
-- One DuckDB file on local disk holds five fact tables (compression time, query measurement, vector
-  search, RAG, random access) plus a `commits` dim table. Schema in
-  [`01-schema.md`](./01-schema.md).
-- One ingest endpoint: `POST /api/ingest`, gated by a static bearer token from the
-  `INGEST_BEARER_TOKEN` env var. Wire shapes in [`02-contracts.md`](./02-contracts.md).
-- Three HTML routes — `/`, `/chart/{slug}`, `/group/{slug}` — and four JSON routes —
-  `GET /api/groups`, `GET /api/chart/{slug}`, `GET /api/group/{slug}`, `GET /health` — all served
-  from the same binary.
-- `ChartKey` and `GroupKey` enums round-trip through URLs as `<prefix>.<base64url(serde_json(...))>`
-  slugs. No DB lookup required to decode a URL.
-- Charts render inline on the landing page. Each `<canvas>` is paired with a
-  `<script id="chart-data-N">` JSON payload that `chart-init.js` hydrates lazily via
-  `IntersectionObserver`.
-- Per-chart toolbar with zoom-as-scope. Each chart fetches its full raw history once
-  (`?n=all`); visual downsampling is **client-side LTTB** in `chart-init.js`
-  (`MAX_VISIBLE_POINTS = 500`, applied only to the currently visible commit range — zoomed-in
-  views render raw). Drag-pan, drag-rectangle-zoom, wheel-pan, the toolbar slider, and a
-  horizontal range-scrollbar strip below each chart all drive the same `rebuildVisibleAndUpdate`
-  so LTTB and the strip stay in lockstep. A "downsampled · K / N" badge surfaces when LTTB is
-  active.
-- Group ordering is hard-coded to match v2's `origin/ct/vfvb:benchmarks-website/index.html` order.
-  Every group is wrapped in a `<details>`, all collapsed by default. The first group's chart
-  payloads are still inlined (capped at `LANDING_INLINE_N = 100` commits) so opening it skips a
-  fetch round-trip; `chart-init.js` lazy-fetches `?n=all` once when the user zooms past the
-  inlined window.
-- A sticky filter bar at the top of every page exposes engine/format chips that drive series
-  visibility across every chart at once. Clicking a data point opens that commit's PR (parsed
-  from `(#NNNN)` in the message; falls back to the commit URL). URL params `?engine=&format=&n=`
-  survive permalink shares and refreshes; per-chart toolbar state (Y axis, slider) is
-  intentionally local-only.
-- `vortex-bench-migrate` reads v2 records, runs each through a classifier in
-  `migrate/src/classifier.rs`, and either routes the record into one of the five fact tables or
-  marks it `Skip(reason)` with a typed reason. The run **fails if more than 5% of records come back
-  as `Unknown`** — silent data loss is not allowed.
-
-## Code map
-
-| Path                                             | What lives here                                                                                                                                                                                                            |
-| ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `benchmarks-website/server/src/main.rs`          | Binary entrypoint. Reads `INGEST_BEARER_TOKEN`, `VORTEX_BENCH_BIND` (default `127.0.0.1:3000`), `VORTEX_BENCH_DB` (default `./bench.duckdb`), `VORTEX_BENCH_LOG`.                                                          |
-| `benchmarks-website/server/src/app.rs`           | `AppState` (DB handle + bearer + path) and the `Router` composition. `CompressionLayer` wraps every response.                                                                                                              |
-| `benchmarks-website/server/src/api/`             | Read API. `mod.rs` holds the axum handlers and re-exports the public surface. Submodules: `dto.rs` (wire-shape structs + `GROUP_ORDER`), `window.rs` (`CommitWindow` + `ChartQuery`), `groups.rs` (`collect_groups` discovery passes), `summary.rs` (v2-compatible rollups), `charts.rs` (`chart_payload`, `collect_group_charts`, `SeriesAccumulator`, per-fact-table collectors), `filter.rs` (`collect_filter_universe`). Known N+1 in `collect_group_charts` (charts.rs) — flagged with a TODO. |
-| `benchmarks-website/server/src/html/`            | HTML routes. `mod.rs` holds `router()`, `UiQuery`, `FilterState`, the three async page handlers, and `collect_landing_groups`. Submodules: `render.rs` (page chrome, `escape_json_for_script`), `landing.rs` (landing body + chart cards, `LandingGroup`), `chart.rs` (chart and group page bodies), `summary.rs` (group summary cards), `filter.rs` (filter dropdown markup), `toolbar.rs` (per-chart toolbar + range strip), `static_assets.rs` (`include_bytes!`'d JS/CSS/SVG + `STATIC_ASSET_VERSION`). `LANDING_INLINE_N: u32 = 100` caps the first group's inlined chart JSON; HTML routes default to `CommitWindow::All`. |
-| `benchmarks-website/server/src/slug.rs`          | `ChartKey` / `GroupKey` enums and `to_slug` / `from_slug` round-trip.                                                                                                                                                      |
-| `benchmarks-website/server/static/chart-init.js` | Hydration, `IntersectionObserver`, lazy-fetch on `<details>` toggle, `rebuildVisibleAndUpdate` (client-side LTTB on the visible range, `MAX_VISIBLE_POINTS = 500`), custom external tooltip + delta rows + click-to-PR, range-scrollbar strip, global filter chips, inline crosshair plugin. The canvas state contract (`canvas.__bench_*` fields) and per-card DOM contract (`data-role` selectors) are documented at the top of the file. |
-| `benchmarks-website/server/static/style.css`     | `.chart-tooltip-host` is `position: absolute; pointer-events: none;` (do not change — fixes the flicker). `.chart-card` is `position: relative`. `.chart-range-strip*` and `.filter-*` selectors back the range scrollbar and global filter chips. |
-| `benchmarks-website/server/tests/`               | `insta` snapshot tests + integration tests, seeded by POSTing to `/api/ingest`. No external fixtures. Tests are split topically (`landing.rs`, `chart_api.rs`, `group_api.rs`, `permalinks.rs`, `static_assets.rs`, `ingest.rs`) sharing fixtures via `common/mod.rs`. |
-| `benchmarks-website/migrate/src/migrate/`        | Migration orchestrator (`mod.rs` — `MigrationSummary`, `run`, `apply_v2_record`, `migrate_data_jsonl`, `migrate_file_sizes`, `flush_all`, `open_target_db`) plus per-fact-table accumulators (`accum.rs` — `QueryAccum`, `CompressionTimeAccum`, `RandomAccessAccum`, `CompressionSizeAccum`, `build_*_batch`). |
-| `benchmarks-website/migrate/src/classifier.rs`   | `classify_outcome` routes records into a fact table, `Skip(reason)`, or `Unknown`. >5% Unknown gates the run.                                                                                                              |
-| `benchmarks-website/migrate/src/verify.rs`       | Structural diff between a migrated DuckDB and v2's live `/api/metadata`. Exits non-zero if any v2 group is missing in v3 — gates a CI step.                                                                                |
-
-## Local dev / smoke test
-
-Build narrow:
-
-```bash
-cargo build -p vortex-bench-server
-```
-
-Run:
-
-```bash
-INGEST_BEARER_TOKEN="dev" cargo run -p vortex-bench-server
-# server logs: "bench server listening addr=127.0.0.1:3000 db=bench.duckdb"
-```
-
-Seed test data via the ingest endpoint (the snapshot tests do this in-process — see
-`server/tests/web_ui.rs` for the envelope shapes).
-
-Run snapshot tests:
-
-```bash
-cargo test -p vortex-bench-server
-INSTA_UPDATE=auto cargo test -p vortex-bench-server   # to update
-```
-
-For an end-to-end smoke test against migrated data, point `VORTEX_BENCH_DB` at the output of
-`vortex-bench-migrate`.
-
-## Repository conventions
-
-See the root [`CLAUDE.md`](/CLAUDE.md) for Rust style, test layout, and CI norms. Project-specific:
-
-- The v3 server crate lives at `benchmarks-website/server/` and is registered in the root
-  `Cargo.toml` `members` list.
-- All commits need a `Signed-off-by:` trailer.
-- Run `cargo +nightly fmt --all` and narrow clippy on what you changed.
-- Public-API changes need `./scripts/public-api.sh`.
-- Every new public item needs a doc comment.
-- Tests return `VortexResult<()>` and use `?`. No `unwrap`.
-- Branch from `ct/benchmarks-v3`, not `develop`. PR back to `ct/benchmarks-v3`.
-- **Never auto-merge**. Open the PR, post the URL, stop. The user reviews and merges.
-
-## Things to avoid
-
-- **Don't widen scope past your task.** If a feature feels missing, check
-  [`deferred.md`](./deferred.md) and the "Deferred UI follow-ups" section of
-  [`README.md`](./README.md) first — it is almost certainly already deferred.
-- **Don't write a server-side classifier for live ingest.** The emitter is responsible for v3-shape
-  records. The migrator's classifier exists only to translate v2 records once.
-- **Don't rebuild a global page-level toolbar with chart-state controls.** Per-chart controls
-  (slider, Y-axis, scope) stay per-chart. The sticky filter bar at the top of every page is the
-  exception — it drives series *visibility* across every chart at once, which is what users want
-  for the engine/format dimension. Don't extend it with per-chart settings.
-- **Don't bind a slider's reactive logic to `change` events.** Use `input` events with a small
-  throttle + debounce, otherwise the slider only updates on release and feels broken.
-- **Don't refetch every time the scope changes.** The chart fetches its full history once; scope
-  buttons, slider, drag-pan, wheel-pan, and the range strip all rebuild via the in-memory LTTB
-  pass on the cached payload. The one exception is the inline-payload zoom-out path: when the user
-  zooms past the first group's inlined `LANDING_INLINE_N` window for the first time,
-  `chart-init.js` lazy-fetches `?n=all` once and replaces the payload.
-- **Don't re-introduce a server-side commit cap.** `?n=all` is the default for HTML routes and the
-  upper bound is unbounded everywhere. Visual downsampling lives client-side in `chart-init.js`,
-  not on the wire.
-- **Don't reverse the predecessor walk in the tooltip.** The chart payload's `commits[]` is sorted
-  oldest-first by SQL — `commits[0]` is the oldest commit, `commits[N-1]` is the newest. For
-  per-row delta the chronological predecessor of `commits[idx]` lives at `idx - 1`. We caught a
-  regression where a "fix" flipped this to `idx + 1`; the original walk-backward direction was
-  right.
-- **Don't re-introduce `pointer-events: auto` on the tooltip host.** The tooltip is positioned at
-  the cursor; making it pointer-interactive causes a flicker loop. Keep it `pointer-events: none`
-  and offset via `transform: translate(12px, 12px)`.
-- **Don't drift from contracts.** Wire-shape changes are a coordinated PR across emitter, migrator,
-  and server.
-- **Don't touch the v2 React/Node app.** It stays in production unchanged until the DNS flip. The v2
-  cleanup is its own PR, post- flip.
-- **Don't reach for WASM.**
-
-## Working branches
-
-| Branch                         | Purpose                                                                                             |
-| ------------------------------ | --------------------------------------------------------------------------------------------------- |
-| `develop`                      | Live v2 site. Don't break.                                                                          |
-| `ct/benchmarks-v3`             | Integration branch carrying the planning commit + landed component PRs. All v3 branches start here. |
-| `claude/benchmarks-v3-<topic>` | Per-task feature branches, branched from `ct/benchmarks-v3` and PR'd back to it.                    |
-
-## How to update this file
-
-Keep it short. If you've learned something a future agent will need:
-
-- Cross-component contract → [`02-contracts.md`](./02-contracts.md)
-- Local detail → your component plan
-- Decided → [`decisions.md`](./decisions.md)
-- Not designing yet → [`deferred.md`](./deferred.md)
-- Cross-cutting agent norm → here
diff --git a/benchmarks-website/planning/README.md b/benchmarks-website/planning/README.md
deleted file mode 100644
index d1cb922ff56..00000000000
--- a/benchmarks-website/planning/README.md
+++ /dev/null
@@ -1,188 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Benchmarks website v3 - Planning
-
-Planning docs for `bench.vortex.dev` v3: a single Rust binary (axum + maud + duckdb-rs) replacing
-the v2 Node/React stack.
-
-## Status
-
-- **Alpha shipped** to `ct/benchmarks-v3`. Server, migrator, full-history UI (client-side LTTB,
-  range scrollbar, global filter chips, click-to-PR tooltip), response compression, and the
-  `LANDING_INLINE_N` cold-load trim are all merged.
-- **In production-readiness phase.** v2 is still serving `bench.vortex.dev`. v3 runs on a
-  throwaway EC2 host for smoke-testing; not deployed publicly yet.
-- **UI follow-ups** are owned by the user, not by agents (see "Deferred UI follow-ups" below).
-
-A 10-bullet architecture summary lives at the top of [`AGENTS.md`](./AGENTS.md). Use that for
-handoffs and external sharing.
-
-## Production readiness checklist
-
-In rough order. Each item is a separate task; do not bundle.
-
-### 1. Repo secrets — done
-
-`INGEST_BEARER_TOKEN` and `V3_INGEST_URL` are set as repo-level secrets on `vortex-data/vortex`.
-They're fine at this scope for the test phase. Move to an Environment-scoped secret (gated to
-`ct/benchmarks-v3` / protected branches) before prod. Rotate `INGEST_BEARER_TOKEN` if the test
-value was ever shared in a comment / Slack / PR review.
-
-### 2. CI ingestion wiring — partial
-
-The dual-write step is wired into `bench.yml` and `sql-benchmarks.yml` via commit `f7fd270`. Still
-to do: an end-to-end run that triggers the workflow on a feature branch, POSTs to the EC2 box, and
-confirms the envelope lands in DuckDB intact. Outbox-style retry on failed POSTs is a follow-up;
-not built until we observe a failure.
-
-### 3. Test deployment
-
-Currently a manual EC2 box for smoke-testing. Latest test host:
-
-- DNS: `ec2-18-219-54-101.us-east-2.compute.amazonaws.com` (changes on stop/start unless an Elastic
-  IP is associated)
-- Port: `3000` (open to `0.0.0.0/0` in the security group)
-- Bind: `VORTEX_BENCH_BIND=0.0.0.0:3000` (default `127.0.0.1` does not work for external access)
-- HTTP only, no TLS. Throwaway bearer token only — don't reuse for prod.
-
-Build path: build narrow on the box itself (it's a `c6a.4xlarge` to avoid local-vs-EC2 arch
-mismatches). The v2 migration source is fetched directly from the public S3 bucket; no AWS creds
-needed.
-
-Smoke test from a laptop:
-
-```bash
-curl -i http://<host>:3000/
-```
-
-Should return HTTP 200 with the landing HTML.
-
-### 4. Smoke test with migrated data — in progress
-
-Run `vortex-bench-migrate` against the v2 source, point `VORTEX_BENCH_DB` at the result, walk every
-group's charts in a browser. Done so far: Random Access (caught and fixed a missing-chart
-regression — see `1228e530`); LTTB downsampling, range scrollbar, filter chips, and click-to-PR
-all behave on real data. Still to walk: every other group at least once.
-
-### 5. Operational hygiene (not yet done)
-
-- Structured logging review (we already use `tracing`; verify fields are useful for prod
-  debugging).
-- Rate limiting on `/api/ingest` — the bearer token is the only gate today.
-- TLS termination strategy: front with a load balancer / nginx / Caddy, or terminate in-process?
-  Decide before DNS flip.
-- DB schema-version tracking, so future migrations are coordinated rather than ad-hoc. The server
-  already exposes the constant via `/health`; what's missing is on-disk persistence and a check on
-  boot.
-- Backup story. Open question: is "copy the file" enough, or do we want a WAL-based /
-  point-in-time approach? Investigate DuckDB options.
-
-### 6. Deployment platform decision
-
-v2 ran on EC2 (see top-level `ec2-init.txt`, `docker-compose.yml`). v3 is a self-contained binary +
-DuckDB file + env var, so the v2 setup isn't reusable verbatim. Decide:
-
-- Reuse the v2 EC2 host (cutover-style)?
-- Stand up a new EC2 box?
-- Containerize and run somewhere managed?
-
-The simplest first cut is a new EC2 instance with a systemd unit and an Elastic IP.
-
-### 7. DNS flip
-
-Point `bench.vortex.dev` at the v3 host. After this:
-
-- v2 is no longer serving production traffic.
-- The v2 cleanup PR (item 8) becomes safe to merge.
-- Production secrets are now load-bearing — rotate `INGEST_BEARER_TOKEN` if the test value was
-  ever shared.
-
-### 8. v2 cleanup PR
-
-A separate PR, opened post-flip. Deletes everything top-level under `benchmarks-website/` that
-belongs to v2:
-
-- `server.js`, `src/`, `index.html`, `vite.config.js`, `package.json`, `package-lock.json`,
-  `public/`
-- Top-level `Dockerfile`, `docker-compose.yml`, `ec2-init.txt`
-- Any GitHub Actions workflows that only target the v2 deploy
-
-The v3 tree under `benchmarks-website/server/` and `benchmarks-website/migrate/` is untouched.
-
-## Open product decisions
-
-These are user/owner decisions, not agent decisions.
-
-- **What migrated data do we keep vs drop?** The classifier currently silently drops every record
-  routed to a `Skip` variant (e.g. `Skip::HistoricalMemory`, legacy random-access shapes). Some of
-  those `Skip`s are real "we don't want this" cases; some are "we'd want this if we extended a
-  fact table." Once v2 is gone the source records are gone with it, so this needs an explicit
-  pass through `Skip` variants before flip.
-- **Group naming.** Server emits names like `tpch sf=1 [nvme]`; v2's published names are
-  `TPC-H (NVMe) (SF=1)`. Either rename the server-emitted names to v2 form, or add a sort-key +
-  display-name map. Cosmetic but visible.
-- **Deferred UI follow-ups.** The user is handling these directly; agents should not pre-empt
-  them:
-  - `collect_group_charts` N+1 refactor in `api/charts.rs::collect_group_charts`.
-  - Mobile legend resize handler. The position is picked once at chart construction via
-    `matchMedia("(max-width: 768px)")`; it doesn't update if the viewport crosses the breakpoint.
-  - Zoom-sync within a group.
-  - Swap the inline crosshair plugin for `chartjs-plugin-crosshair`.
-
-## Reading order (alpha-era reference)
-
-Still useful for context on why the schema and contracts look the way they do. Not all of this is
-current.
-
-| File                                             | Read when                                                                                  |
-| ------------------------------------------------ | ------------------------------------------------------------------------------------------ |
-| [`AGENTS.md`](./AGENTS.md)                       | Always. Status, architecture, code map, conventions, common mistakes.                      |
-| [`00-overview.md`](./00-overview.md)             | The original alpha pitch and dependency map.                                               |
-| [`01-schema.md`](./01-schema.md)                 | The five DuckDB fact tables + `commits` dim. Live contract.                                |
-| [`02-contracts.md`](./02-contracts.md)           | Wire shapes (one `kind` per fact table), HTTP error matrix, auth header. Live contract.    |
-| [`benchmark-mapping.md`](./benchmark-mapping.md) | Existing benchmarks → fact tables. Live reference, especially when extending the migrator. |
-| [`decisions.md`](./decisions.md)                 | What was pinned for alpha.                                                                 |
-| [`deferred.md`](./deferred.md)                   | What was punted from alpha. Cross-reference with the "Deferred UI follow-ups" list above.  |
-| `components/<name>.md`                           | Original per-workstream plans. All three are merged; treat these as historical.            |
-
-## Components (merged)
-
-Three workstreams shipped for alpha. All three are merged to `ct/benchmarks-v3`. Plans kept for
-reference.
-
-| Component | Plan                                             | Status                              |
-| --------- | ------------------------------------------------ | ----------------------------------- |
-| Server    | [components/server.md](./components/server.md)   | Merged.                             |
-| Emitter   | [components/emitter.md](./components/emitter.md) | Merged.                             |
-| Web UI    | [components/web-ui.md](./components/web-ui.md)   | Merged (plus per-chart UX rebuild). |
-
-## Working branches
-
-- `develop` — the v2 site, in production. **Do not touch** until after DNS flip.
-- `ct/benchmarks-v3` — the integration branch. All v3 work lands here. Feature branches branch
-  from it and PR back to it.
-- `claude/benchmarks-v3-<topic>` — per-task feature branches.
-
-PRs are reported by URL but **never auto-merged**. The user reviews and merges.
-
-## What this plan is not
-
-- Not a parity-with-v2 plan. v3 ships the existing benchmark groups, not v2's exact UX.
-- Not a phase-2 design doc. Phase 2 is the prod-readiness checklist above; further-out work lives
-  in [`deferred.md`](./deferred.md) and the "Deferred UI follow-ups" list.
-
-## Updating these docs
-
-If you find a gap, prefer to:
-
-1. Update [`02-contracts.md`](./02-contracts.md) when the gap is at a component boundary.
-2. Update [`AGENTS.md`](./AGENTS.md) when the gap is a new agent norm or a new "thing to avoid."
-3. Update this file when the gap is the prod-readiness punch list or an open product decision.
-4. Update [`decisions.md`](./decisions.md) when the gap is "we just haven't decided yet, but we
-   need to."
-5. Update [`deferred.md`](./deferred.md) when the gap is "real work but not now."
-
-Don't add a new top-level numbered doc.
diff --git a/benchmarks-website/planning/benchmark-mapping.md b/benchmarks-website/planning/benchmark-mapping.md
deleted file mode 100644
index 7b809350862..00000000000
--- a/benchmarks-website/planning/benchmark-mapping.md
+++ /dev/null
@@ -1,147 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Existing benchmarks → fact-table mapping
-
-A cross-reference from today's benchmark code to the v3 fact tables
-in [`01-schema.md`](./01-schema.md). Use this when implementing
-emitter `to_v3_json` (component plan in
-[`components/emitter.md`](./components/emitter.md)) or when sanity-
-checking that the schema is expressive enough.
-
-If a benchmark in this repo is not listed here, it is either
-deferred to phase 2 or out of scope for the bench website.
-
-## Source measurement type → target table
-
-The canonical mapping. The Rust types live in
-`vortex-bench/src/measurements.rs` (and per-benchmark crates).
-
-| Source type | Wire `kind` | Target table | Notes |
-|---|---|---|---|
-| `QueryMeasurement` (paired with `MemoryMeasurement`) | `query_measurement` | `query_measurements` | The two structs collapse into **one** v3 record. Memory fields are omitted if `--track-memory` was off. |
-| `TimingMeasurement` (only the random-access variant uses this today) | `random_access_time` | `random_access_times` | |
-| `CompressionTimingMeasurement` | `compression_time` (with `op ∈ {encode, decode}`) | `compression_times` | The `op` is decided by which side of `compress-bench`'s timing loop produced it. |
-| `CustomUnitMeasurement` with byte unit (sizes) | `compression_size` | `compression_sizes` | A new `CompressionSizeMeasurement` extraction lives in `vortex-bench/src/compress/mod.rs`; the emitter no longer rides on `CustomUnitMeasurement`. |
-| `CustomUnitMeasurement` with `ratio` unit | **dropped** | none | Computed at read time from `compression_sizes`. |
-| `ScanTiming` (vector-search) | `vector_search_run` | `vector_search_runs` | Carries timing **plus** the three counters in the same row. |
-
-## Per-binary inventory
-
-Every benchmark binary in this repo, the measurement structs it
-produces today, and the v3 tables those measurements land in.
-
-### `benchmarks/datafusion-bench`
-
-Runs the SQL query suites with `engine = datafusion`, parameterized
-over a `Format` (parquet, vortex-file-compressed, vortex-compact,
-arrow, lance via the lance-bench wrapper).
-
-- Produces `QueryMeasurement` (+ `MemoryMeasurement` when
-  `--track-memory`) → **`query_measurements`**.
-- One row per `(commit, dataset, dataset_variant, scale_factor,
-  query_idx, storage, engine = "datafusion", format)`.
-
-### `benchmarks/duckdb-bench`
-
-Same as `datafusion-bench` but with `engine = duckdb`.
-
-- Produces `QueryMeasurement` (+ `MemoryMeasurement` when tracking)
-  → **`query_measurements`**, with `engine = "duckdb"`.
-
-### `benchmarks/lance-bench`
-
-Three things in one crate:
-
-1. **Query runner** (`src/main.rs`): `engine = datafusion`,
-   `format = lance` only. Produces `QueryMeasurement` (+
-   `MemoryMeasurement`) → **`query_measurements`**.
-2. **Compression runner** (`src/compress.rs`): produces
-   `CompressionTimingMeasurement` + size `CustomUnitMeasurement` →
-   **`compression_times`** (with `op ∈ {encode, decode}`,
-   `format = lance`) and **`compression_sizes`**
-   (`format = lance`).
-3. **Random-access runner** (`src/random_access.rs`): produces
-   `TimingMeasurement` → **`random_access_times`** with
-   `format = lance`.
-
-### `benchmarks/compress-bench`
-
-The compression suite. Per dataset, runs encode + decode against
-each enabled `Format` and records the resulting on-disk size.
-
-- `CompressionTimingMeasurement` for encode → **`compression_times`**
-  with `op = "encode"`.
-- `CompressionTimingMeasurement` for decode → **`compression_times`**
-  with `op = "decode"`.
-- Byte-unit `CustomUnitMeasurement` (the size entries) →
-  **`compression_sizes`**.
-- Ratio-unit `CustomUnitMeasurement` (the `vortex:parquet-zstd
-  ratio/...` entries) → **dropped**. The reader recomputes ratios
-  from `compression_sizes`.
-
-### `benchmarks/random-access-bench`
-
-The random-access "take" timing suite. Datasets here (chimp, taxi,
-etc.) are a different namespace from the SQL query suites.
-
-- `TimingMeasurement` → **`random_access_times`**.
-- `format` is one of `vortex-file-compressed`, `vortex-compact`,
-  `parquet`, `lance`.
-
-### `benchmarks/vector-search-bench`
-
-Cosine-similarity scan over a vector dataset. Each dataset/layout/
-flavor combination produces a single `ScanTiming` per scan
-configuration.
-
-- `ScanTiming` → **`vector_search_runs`**.
-- `dataset` from `VectorDataset` (e.g. `cohere-large-10m`).
-- `layout` from `TrainLayout`.
-- `flavor` from `VectorFlavor` (compression flavor; the vector-
-  search analogue of `format`).
-- `threshold`, `iterations` are real columns.
-- `query_seed` is **not** stored - it's a deterministic seed for
-  the query sampler and not a measurement dimension.
-
-## Per-suite dim values
-
-For SQL query suites (everything that flows through
-`query_measurements`), the dim columns are populated as follows:
-
-| `BenchmarkArg` | `dataset` | `dataset_variant` | `scale_factor` | Notes |
-|---|---|---|---|---|
-| `TpcH` | `tpch` | NULL | TPC SF as string (`"1"`, `"10"`, `"100"`, `"1000"`) | |
-| `TpcDS` | `tpcds` | NULL | TPC SF as string | |
-| `ClickBench` | `clickbench` | NULL | NULL | The migrate path does not encode the `partitioned` / `single` flavor in `dataset_variant`, so the live emitter also leaves it `NULL` to keep historical and live rows in one group. The active flavor is fixed per CI matrix entry. |
-| `StatPopGen` | `statpopgen` | NULL | NULL | The migrate path (v2 → v3 backfill) does not carry a per-record scale factor for this suite, so the live emitter also leaves it `NULL` to keep historical and live rows in one group. |
-| `PolarSignals` | `polarsignals` | NULL | NULL | Same as StatPopGen. |
-| `Fineweb` | `fineweb` | NULL | NULL | |
-| `GhArchive` | `gharchive` | NULL | NULL | |
-| `PublicBi` | `public-bi` | dataset name (e.g. `cms-provider`) | NULL | The Public-BI sub-dataset name lives in `dataset_variant`. |
-
-For non-query suites:
-
-- `compress-bench`: `dataset` is the compression dataset name; if
-  the suite later grows variants, `dataset_variant` is available.
-- `random-access-bench`: `dataset` is the random-access dataset
-  name. No variant column on this table.
-- `vector-search-bench`: see the [vector_search_runs
-  table](./01-schema.md#vector_search_runs).
-
-## What this implies for the emitter
-
-The mapping above is the contract `vortex-bench --gh-json-v3`
-implements. Any v3 record an emitter writes today must land in
-exactly one of the five tables; if a future measurement type
-doesn't fit, that's the signal to add a sixth table (and a sixth
-`kind`) rather than overload one of these.
-
-The **historical migrator** will use the same mapping when it lands
-(it's deferred - see [`deferred.md`](./deferred.md#historical-data-migration)).
-The v2 classifier on `develop` at `benchmarks-website/server.js`
-becomes useful then, because the v2 S3 dump pre-dates the
-discriminator and we'll have to recover `kind` from name strings.
-For new ingest at alpha, no classifier is needed.
diff --git a/benchmarks-website/planning/components/emitter.md b/benchmarks-website/planning/components/emitter.md
deleted file mode 100644
index e462a9804c8..00000000000
--- a/benchmarks-website/planning/components/emitter.md
+++ /dev/null
@@ -1,86 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Component: Emitter (alpha)
-
-## Required reading
-
-- [`../00-overview.md`](../00-overview.md)
-- [`../02-contracts.md`](../02-contracts.md)
-- [`../benchmark-mapping.md`](../benchmark-mapping.md) - the
-  source-type → target-table mapping.
-
-## Goal
-
-Extend `vortex-bench` so it emits v3-shape JSON. Plus a small POST
-script that wraps the JSONL in an envelope and sends it to a
-running alpha server.
-
-This is **purely additive** to v2's emission path. Nothing in v2 is
-touched. CI workflow integration, dual-write, the orchestrator
-update, and the outbox safety net all wait until after the alpha
-loop works end-to-end (see [`../deferred.md`](../deferred.md)).
-
-## In scope
-
-### Rust emitter
-
-- Add a `--gh-json-v3 <path>` CLI flag that writes JSONL of bare
-  v3 records (no envelope). The legacy `-d gh-json -o ...` form is
-  untouched - both work at alpha.
-- Emit a record with the appropriate `kind` for every measurement
-  type produced today. The mapping from existing measurement
-  structs to wire `kind`s is the table in
-  [`../benchmark-mapping.md`](../benchmark-mapping.md).
-- Two non-obvious points (everything else is mechanical):
-  - `QueryMeasurement` and the paired `MemoryMeasurement` collapse
-    into **one** `query_measurement` record with both `value_ns`
-    and the four memory fields. If memory wasn't tracked, omit the
-    memory fields.
-  - Vector-search's `ScanTiming` doesn't carry its own dataset /
-    layout / threshold (those live in the binary's `Args`). The
-    emitter has to plumb them through to the record.
-- `CustomUnitMeasurement` cross-format ratios are **not emitted** -
-  ratios are computed in the read path.
-- Snapshot tests per `kind` (any framework), scrubbing `commit_sha`
-  and `env_triple`.
-
-### Post-ingest script
-
-A small Python script (path of the agent's choosing, e.g. under
-`scripts/`) that:
-
-- Reads JSONL of records.
-- Fills the `commit` envelope fields by shelling out to `git show`
-  (or equivalent) for the SHA passed as an argument.
-- Wraps the records in the envelope from
-  [`../02-contracts.md`](../02-contracts.md).
-- POSTs to `<server>/api/ingest` with the bearer token.
-- Exits non-zero on 4xx / 5xx. **No retries, no spool, no S3
-  outbox at alpha** - those land when CI starts using this.
-
-## Out of scope (deferred)
-
-- Replacing the v2 `-d`/`-o` CLI form. Both forms coexist at alpha.
-- Removing the v2 `gh-json` emission path.
-- Updating `bench-orchestrator` or any GitHub Actions workflows.
-  Alpha runs are manual.
-- Retry / spool / outbox-drain on POST failures.
-
-See [`../deferred.md`](../deferred.md) for the post-alpha plan.
-
-## Acceptance criteria
-
-- `cargo test -p vortex-bench` passes; one snapshot per `kind`.
-- Running a benchmark with `--gh-json-v3 <path>` writes valid JSONL
-  matching the wire shape from
-  [`../02-contracts.md`](../02-contracts.md).
-- The post-ingest script round-trips a fixture file through a
-  running alpha server (200 with non-zero `inserted` on first run,
-  200 with non-zero `updated` on second run).
-
-## Branch
-
-`claude/benchmarks-v3-emitter`
diff --git a/benchmarks-website/planning/components/server.md b/benchmarks-website/planning/components/server.md
deleted file mode 100644
index 8bed8485e70..00000000000
--- a/benchmarks-website/planning/components/server.md
+++ /dev/null
@@ -1,70 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Component: Server (alpha)
-
-## Required reading
-
-- [`../00-overview.md`](../00-overview.md)
-- [`../01-schema.md`](../01-schema.md)
-- [`../02-contracts.md`](../02-contracts.md)
-
-## Goal
-
-A single Rust binary: an HTTP server that owns a DuckDB file on
-local disk, accepts authenticated `/api/ingest` POSTs, and serves
-enough of a read API to render one chart page.
-
-This is the **alpha** version. It runs locally or on a dev box; no
-production deploy. Production deploy, backups, admin tooling, and
-historical data import are deferred (see
-[`../deferred.md`](../deferred.md)).
-
-The server crate is `vortex-bench-server`, living at
-`benchmarks-website/server/`, registered as a workspace member.
-
-## In scope
-
-- Open the DuckDB file and apply the schema DDL on boot. No
-  migration framework yet - if the schema changes during alpha,
-  delete the file and re-run.
-- Bearer-token middleware on `/api/ingest`. Token from
-  `INGEST_BEARER_TOKEN` env var, constant-time compared.
-- `POST /api/ingest`: parse the envelope from
-  [`../02-contracts.md`](../02-contracts.md), upsert the commit,
-  dispatch each record to its destination fact table by `kind`,
-  enforce all-or-nothing per POST. Compute each row's
-  `measurement_id` server-side as part of the INSERT. Return
-  `{ inserted, updated }` aggregated across tables.
-- `GET /api/groups` and `GET /api/chart/:slug`: enough to render
-  one chart page. Slugs round-trip; the agent picks the format.
-- `GET /health`: enough to confirm the DB is open and ingest is
-  working (path, latest commit timestamp, per-table row counts -
-  exact shape is the agent's call).
-- Mount whatever HTML routes the web-ui component contributes.
-
-Framework, templating engine (`maud` or `askama`), DuckDB driver
-version, module layout, and DB-access concurrency model are the
-agent's call. Pin the DuckDB crate version in `Cargo.toml`.
-
-## Out of scope (deferred)
-
-Schema migrations, lookup tables, pre-built views, multi-page read
-API, admin endpoints, containerization, EBS mount, backups. See
-[`../deferred.md`](../deferred.md).
-
-## Acceptance criteria
-
-- `cargo build` succeeds for the server crate.
-- Integration test: POST a fixture envelope with a valid bearer →
-  200; POST again → 200 with `updated > 0, inserted = 0`; POST
-  with no/wrong bearer → 401; POST with an unknown `kind` → 400.
-- `GET /health` returns a coherent shape after an ingest.
-- `cargo run` for the server, pointed at a fresh DuckDB file,
-  serves both read routes locally.
-
-## Branch
-
-`claude/benchmarks-v3-server`
diff --git a/benchmarks-website/planning/components/web-ui.md b/benchmarks-website/planning/components/web-ui.md
deleted file mode 100644
index abf9e9de4a0..00000000000
--- a/benchmarks-website/planning/components/web-ui.md
+++ /dev/null
@@ -1,62 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Component: Web UI (alpha)
-
-## Required reading
-
-- [`../00-overview.md`](../00-overview.md)
-- [`../01-schema.md`](../01-schema.md)
-- [`../02-contracts.md`](../02-contracts.md) - the JSON shapes you
-  render against.
-
-## Goal
-
-Get something on screen. **One landing page** that lists groups and
-**one chart page** that renders a single chart. SSR HTML + a thin
-Chart.js hydration. That's it for alpha.
-
-This component develops in parallel against a fixture-populated
-DuckDB - no dependency on the live ingest path.
-
-## In scope
-
-- A fixture: a small DuckDB file (or a builder that produces one
-  from a JSONL fixture) covering all five fact tables with a
-  handful of records each. Used for dev and tests.
-- Landing page (`GET /`): list of groups with links into chart
-  pages, derived from `/api/groups`.
-- Chart page (`GET /chart/:slug`): one Chart.js line chart, data
-  embedded inline as a JSON `<script>` tag (no client-side
-  round-trip after page load).
-- Plain CSS. No client-side framework.
-
-Templating engine, exact module layout, fixture format, and any
-helper crates are the agent's call. If the server crate already
-chose `maud` vs `askama`, follow it.
-
-## Out of scope (deferred)
-
-- Per-commit page, full group landing with filters / modal /
-  zoom-pan, ad-hoc SQL page, mobile redesign.
-- Engine + category filters, search, full-screen modal, deep links.
-- LTTB downsampling.
-- Lookup-table-driven engine names and color palettes (use the raw
-  `engine:format` strings and a small fallback palette).
-- Summary cards (geomean ratios, rankings).
-
-See [`../deferred.md`](../deferred.md).
-
-## Acceptance criteria
-
-- Both routes render against the fixture DB.
-- The chart hydrates without a network round-trip after page load.
-- Snapshot test of the rendered HTML for both pages, against the
-  fixture.
-- Manually verified in a real browser; recorded in PR description.
-
-## Branch
-
-`claude/benchmarks-v3-web-ui`
diff --git a/benchmarks-website/planning/decisions.md b/benchmarks-website/planning/decisions.md
deleted file mode 100644
index f9fef1bb7b7..00000000000
--- a/benchmarks-website/planning/decisions.md
+++ /dev/null
@@ -1,95 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Decisions
-
-A log of the decisions actually pinned for the alpha. Phase-2
-decisions deliberately stay open until we get there - see
-[`deferred.md`](./deferred.md).
-
-## Resolved (alpha)
-
-- **Storage backend**: DuckDB on local disk.
-- **Single binary**: server is one Rust process - HTTP API + HTML
-  routes + DuckDB owner. No separate ingester service, no S3
-  coordination layer for writes, no client-side WASM.
-- **Server crate**: `vortex-bench-server` at `benchmarks-website/server/`,
-  registered as a workspace member.
-- **Server-side classifier**: there isn't one. The emitter writes
-  v3-shape records directly.
-- **Fact-table layout**: one fact table per measurement family
-  (`query_measurements`, `compression_times`, `compression_sizes`,
-  `random_access_times`, `vector_search_runs`) plus a `commits` dim
-  table. **No single wide fact table** with a discriminator column.
-  Rationale: the families have genuinely different dim and value
-  shapes; merging them either bloats every row with NULLs or splits
-  scan results across rows that have to be re-joined. See
-  [`01-schema.md`](./01-schema.md).
-- **Wire-format discrimination**: each ingest record carries a
-  `kind` field that names its destination table. See
-  [`02-contracts.md`](./02-contracts.md).
-- **`measurement_id` is server-internal**: each fact table has a
-  primary key that is a deterministic hash of its dim tuple, used
-  for idempotent upsert. The hash is **not on the wire**; the
-  emitter never computes it. Algorithm and encoding are the
-  server's call.
-- **Compression encode vs decode**: a single `compression_times`
-  table with an `op ∈ {encode, decode}` column.
-- **Compression sizes vs times**: separate tables. Different value
-  type (bytes vs ns) and different cardinality (one-shot vs
-  iterated).
-- **Storage of ratios**: not stored as rows. Computed at read time
-  from `compression_sizes`.
-- **Auth at alpha**: shared bearer token in an env var,
-  constant-time compared. Upgrade paths are deferred.
-- **Initial render**: SSR HTML with chart data embedded inline as
-  JSON. Client-side hydration runs Chart.js against that data.
-- **API backwards compat**: none. v3 designs fresh JSON shapes.
-- **CLI shape for emitters**: a new `--gh-json-v3 <path>` flag
-  alongside the existing `-d`/`-o` form. Both coexist during alpha;
-  consolidating the CLI is deferred.
-- **`--gh-json-v3` on-disk format**: JSONL of bare records, one
-  per line. The ingest envelope (`run_meta` + `commit`) is added
-  by the post-ingest script, not by the Rust emitter.
-- **`storage` values**: `nvme` or `s3`. Legacy `gcs` is removed.
-  Only `query_measurements` carries `storage`.
-- **`scale_factor`**: column on `query_measurements` only,
-  nullable. Populated for TPC-H/TPC-DS/StatPopGen/PolarSignals;
-  NULL for ClickBench/Fineweb/GhArchive/Public-BI. Categorical
-  variants (ClickBench flavor, Public-BI dataset name) go in a
-  separate `dataset_variant` column.
-- **No JSON escape hatch**: new benchmark parameters become real
-  columns.
-- **Commit metadata**: included in every `/api/ingest` payload. The
-  server never reaches out to GitHub.
-- **All-or-nothing transactions in `/api/ingest`**: yes; the
-  reported `inserted`/`updated` counts are aggregated across all
-  five tables.
-- **Per-iteration runtimes**: stored in-row as a list column.
-- **Slugs are opaque**: the web-ui treats slugs returned by
-  `/api/groups` as opaque strings and feeds them back unmodified
-  into `/api/chart/:slug`. The server picks the slug format.
-
-## In use (locked in by the server PR)
-
-These were "recommended" before the server PR landed; they are now
-the actual stack in `benchmarks-website/server/Cargo.toml`. The
-web-ui agent inherits them by working in the same crate.
-
-- HTTP framework: `axum`.
-- Compile-time HTML templates: `maud`.
-- DuckDB driver: `duckdb-rs`, version pinned in the server crate's
-  `Cargo.toml`.
-- Snapshot tests: `insta` (workspace dep).
-- Logging: `tracing` (workspace dep).
-
-## Open
-
-Specific column choices may still tighten as the emitter and server
-land - the **shape** (five tables, the listed dimensions per table)
-is the resolved decision. Phase-2 work is in
-[`deferred.md`](./deferred.md): deploy strategy, schema migration
-framework, admin auth, CI integration, downsampling, PR comparison
-post-cutover, EBS RPO. None of these block the alpha.
diff --git a/benchmarks-website/planning/deferred.md b/benchmarks-website/planning/deferred.md
deleted file mode 100644
index 4c1e578c612..00000000000
--- a/benchmarks-website/planning/deferred.md
+++ /dev/null
@@ -1,118 +0,0 @@
-<!--
-SPDX-License-Identifier: Apache-2.0
-SPDX-FileCopyrightText: Copyright the Vortex contributors
--->
-
-# Deferred work (phase 2+)
-
-Things we know we need eventually, but **not in alpha**. Each item
-gets a one-paragraph stub here so we don't lose the thinking. None
-of these is being designed in detail right now: the path forward is
-clearer once the alpha loop (server + emitter + web-ui) is running
-end-to-end.
-
-The order below is roughly the expected pickup order, but nothing
-is binding.
-
-## Historical data migration
-
-A one-shot binary that reads `s3://vortex-ci-benchmark-results/data.json.gz`
-+ `commits.json` + `file-sizes-*.json.gz` and writes a fully
-populated v3 DuckDB. Carries a bug-for-bug port of v2's
-`server.js::getGroup` classifier (the only place a classifier
-exists in the codebase). Verifies against `bench.vortex.dev`'s
-`/api/metadata` before dual-write opens; the binary and its
-classifier are deleted post-cutover. The v2 classifier and lookup
-tables to port from live on `develop` at
-`benchmarks-website/server.js` and `benchmarks-website/src/config.js`.
-
-## Production deploy
-
-Dockerfile + docker-compose + EC2 init + EBS mount + nightly DuckDB
-`.backup` to S3 + watchtower polling ghcr.io. Single-EC2 deploy,
-matching v2's footprint. RPO is bounded by snapshot cadence; if
-that's too loose, streaming WAL backup is a follow-up.
-
-## CI workflow integration
-
-Update `bench-orchestrator/runner/executor.py` to pass the new
-`--gh-json-v3` flag. Add a dual-write step in `bench.yml` and
-`sql-benchmarks.yml` that POSTs to the v3 server alongside the
-existing `cat-s3.sh` append. Add a `commit-metadata` step that POSTs
-`records: []` for every push to `develop`. Old `--gh-json` emission
-stays alive through cutover.
-
-## Outbox safety net
-
-When CI POSTs start landing in real volume, failed POSTs need
-somewhere to go. Plan: `post-ingest.py` falls back to dumping the
-payload to `s3://vortex-ci-benchmark-results/outbox/<run_id>/...`,
-and a `drain-ingest-outbox.yml` cron re-POSTs every 10 minutes,
-deleting on success. Not built until we observe a failure that needs
-it.
-
-## Schema migration framework
-
-A `schema_meta` table + forward-only `migrations/NNN_*.sql` files
-applied in lex order on boot. Tested by replaying against a recent
-prod backup before merge. Not needed at alpha (the DB is rebuilt on
-schema change); becomes essential once real data lives in the DB.
-
-## Lookup tables and seed SQL
-
-`known_engines` / `known_formats` / `known_datasets`: display names
-and color hex per row, populated from a seed SQL file applied on
-every boot. Replaces v2's `ENGINE_RENAMES` / `SERIES_COLOR_MAP`
-constants in `config.js`. Until this lands, the web-ui falls back to
-raw `engine:format` strings and a small palette.
-
-## Derived views
-
-`v_compression_ratios`, `v_latest_per_group`, etc. Replaces v2's
-stored `vortex:parquet ratio compress time/...` rows with on-the-fly
-SQL. Until the views land, handlers compute the same thing inline.
-
-## Multi-page UI
-
-The full v2 page inventory: per-group landing with engine/category
-filters, full-screen modal, zoom/pan, deep links, per-commit
-snapshot page, summary cards (geomean ratios, random-access
-rankings), ad-hoc SQL page, mobile-friendly redesign.
-
-## Admin tooling
-
-A `benchmarks-admin` CLI that talks to the running server over a
-unix-domain socket, file-permission-gated to the bench user. SSH
-access to the host = admin access. First commands: `health`,
-`reload-seed`, `backup-now`. Unix socket listener in the server is
-the integration point. No HTTP `/admin/*` surface.
-
-## Auth upgrades
-
-The shared bearer token in a GH Actions secret + EC2 env var is
-fine for alpha and likely for launch. Upgrade paths (post-launch):
-AWS ALB + Cognito + GitHub OIDC, Cloudflare Tunnel + Access, or
-server-side GitHub JWKS validation. Pick when an admin feature or
-a multi-tenant scenario actually needs it.
-
-## Cutover + cleanup
-
-DNS flip from v2 to v3. Subsequent cleanup PR removes:
-- The v2 React app + Node `server.js`.
-- The legacy `vortex-bench --gh-json` emission path.
-- `scripts/cat-s3.sh` and `scripts/commit-json.sh`.
-- The migrator crate and its bug-for-bug classifier.
-
-## CI PR comparison post-cutover
-
-`sql-benchmarks.yml` PR mode currently downloads `data.json.gz`
-from S3 to find a baseline. Post-cutover that file stops growing.
-Plan: point the comparator at the v3 server's
-`/api/chart/:slug?last=N`. Cleanup follows the cutover PR.
-
-## Downsampling
-
-LTTB-style downsampling at 1x/2x/4x/8x, like v2 does today. Not
-built until charts visibly suffer at full resolution. Implementation
-is a SQL window function at query time, memoized per
-`(slug, range, level)`. Doesn't change the schema either way.
diff --git a/benchmarks-website/server/src/api/charts.rs b/benchmarks-website/server/src/api/charts.rs
index 0db858cf712..b0edfb0380c 100644
--- a/benchmarks-website/server/src/api/charts.rs
+++ b/benchmarks-website/server/src/api/charts.rs
@@ -74,10 +74,11 @@ pub(crate) fn chart_payload(
 
 /// Collect every chart inside one group. Returns `None` if the group has no
 /// data at all (callers should render a 404).
-// TODO: this currently re-runs the entire `collect_groups` discovery pass
-// (api.rs) per call before fetching each chart, which makes the landing page
-// O(groups * charts_per_group) DB queries plus the discovery scan. Fine for
-// the current dataset; revisit when chart counts grow.
+///
+/// Known N+1: this re-runs the entire [`collect_groups`] discovery pass per
+/// call before fetching each chart, so the landing page is
+/// O(groups * charts_per_group) DB queries plus the discovery scan. Fine
+/// for the current dataset; the rewrite is owned outside this branch.
 pub(crate) fn collect_group_charts(
     conn: &Connection,
     key: &GroupKey,
diff --git a/benchmarks-website/server/src/api/dto.rs b/benchmarks-website/server/src/api/dto.rs
index 1b1569f3564..0ffe3752497 100644
--- a/benchmarks-website/server/src/api/dto.rs
+++ b/benchmarks-website/server/src/api/dto.rs
@@ -4,10 +4,10 @@
 //! Wire-shape data transfer objects for the read API.
 //!
 //! These structs are the JSON the server emits on `/api/groups`,
-//! `/api/group/{slug}`, `/api/chart/{slug}`, and `/health`. The shapes match
-//! the contracts documented in `planning/02-contracts.md`; renaming or
-//! reordering fields is a wire-compat break and must be coordinated with
-//! the emitter and migrator.
+//! `/api/group/{slug}`, `/api/chart/{slug}`, and `/health`. Renaming or
+//! reordering fields is a wire-compat break — coordinate with
+//! `chart-init.js` (and the emitter / migrator if the change is on the
+//! ingest side, see [`crate::records`]).
 
 use std::collections::BTreeMap;
 
diff --git a/benchmarks-website/server/src/api/mod.rs b/benchmarks-website/server/src/api/mod.rs
index e4a4c18389a..7a3058f691d 100644
--- a/benchmarks-website/server/src/api/mod.rs
+++ b/benchmarks-website/server/src/api/mod.rs
@@ -4,9 +4,10 @@
 //! Read-side API: `/api/groups`, `/api/chart/{slug}`, `/api/group/{slug}`,
 //! `/health`.
 //!
-//! Group / chart / series fit follows
-//! `benchmarks-website/planning/01-schema.md`. Slugs round-trip through
-//! [`crate::slug::ChartKey`] / [`crate::slug::GroupKey`].
+//! Group / chart / series fit follows the layout in [`crate::schema`]:
+//! one fact table per measurement family, each with a known group / chart /
+//! series tuple. Slugs round-trip through [`crate::slug::ChartKey`] and
+//! [`crate::slug::GroupKey`].
 //!
 //! Submodules:
 //! - [`mod@dto`]          — every wire-shape struct (`Group`, `ChartResponse`, …).
diff --git a/benchmarks-website/server/src/ingest.rs b/benchmarks-website/server/src/ingest.rs
index 1edf3acf126..2ada60dadf0 100644
--- a/benchmarks-website/server/src/ingest.rs
+++ b/benchmarks-website/server/src/ingest.rs
@@ -3,9 +3,33 @@
 
 //! `POST /api/ingest` handler.
 //!
-//! All-or-nothing per envelope: every record is upserted in a single DuckDB
-//! transaction or none of them are. The reported `inserted`/`updated` counts
-//! aggregate across all five fact tables.
+//! Accepts a single [`crate::records::Envelope`] per POST and applies every
+//! record inside one DuckDB transaction. Bearer auth is enforced one layer
+//! up by [`crate::auth::require_bearer`] before the body is read.
+//!
+//! ## HTTP matrix
+//!
+//! | Condition                                                           | Status                                            |
+//! |---------------------------------------------------------------------|---------------------------------------------------|
+//! | Happy path                                                          | 200 with `{ "inserted": N, "updated": M }`        |
+//! | Malformed JSON or unknown field at the envelope level               | 400                                               |
+//! | Unknown `kind`, unknown record field, or per-record validation fail | 400 with the offending record's index             |
+//! | Missing or invalid bearer token                                     | 401 (raised by [`crate::auth::require_bearer`])   |
+//! | Schema version newer than this server expects                       | 409                                               |
+//! | Other server error                                                  | 500                                               |
+//!
+//! All-or-nothing semantics: a single failed record fails the whole batch
+//! and the transaction is rolled back. The reported `inserted` and `updated`
+//! counts aggregate across all five fact tables.
+//!
+//! ## measurement_id is server-internal
+//!
+//! Every fact-table row's primary key is computed by
+//! [`crate::db::measurement_id_query`] et al. just before the INSERT and
+//! never crosses a process boundary; emitters do not (and should not) send
+//! one. Re-ingesting the same `(commit_sha, dim tuple)` pair is the upsert
+//! case — `ON CONFLICT (measurement_id) DO UPDATE` overwrites the value
+//! columns and bumps the `updated` counter instead of `inserted`.
 
 use anyhow::Context as _;
 use anyhow::Result;
diff --git a/benchmarks-website/server/src/lib.rs b/benchmarks-website/server/src/lib.rs
index ae0a19b6cb9..9794f887476 100644
--- a/benchmarks-website/server/src/lib.rs
+++ b/benchmarks-website/server/src/lib.rs
@@ -1,11 +1,66 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Vortex benchmarks website v3 (alpha) server.
+//! `vortex-bench-server` — the v3 [`bench.vortex.dev`](https://bench.vortex.dev)
+//! server.
 //!
-//! This crate is a leaf binary that owns a DuckDB file on local disk,
-//! accepts authenticated `/api/ingest` POSTs, and serves a small read API
-//! plus the HTML pages contributed by the web-ui component.
+//! A single Rust binary that owns one DuckDB file on local disk, accepts
+//! bearer-authenticated `/api/ingest` POSTs, and serves the JSON read API
+//! plus every HTML page. All static assets (Chart.js + the zoom plugin +
+//! `chart-init.js` + `style.css` + the two logos) are baked into the
+//! binary via `include_bytes!` so a deploy is one binary plus a database
+//! file. A `tower-http::CompressionLayer` wraps every response — the
+//! landing page HTML alone is several hundred KB uncompressed, so this is
+//! the single biggest cold-load win.
+//!
+//! ## Routes
+//!
+//! - `GET /` — landing page, every group rendered as a collapsed
+//!   `<details>`. The first group's chart payloads are inlined into the
+//!   HTML; the rest are shells fetched on first toggle.
+//! - `GET /chart/{slug}` — single-chart permalink.
+//! - `GET /group/{slug}` — every chart in one group on a single page.
+//! - `GET /static/...` — the bundled JS / CSS / PNGs.
+//! - `GET /api/groups` — flat list of every group with chart-link metadata.
+//! - `GET /api/chart/{slug}` — one chart's payload (`commits`, `series`,
+//!   `unit_kind`, ...).
+//! - `GET /api/group/{slug}` — every chart in one group, payload-inlined.
+//! - `GET /health` — liveness probe + per-table row counts.
+//! - `POST /api/ingest` — bearer-gated ingest. See [`ingest`] for the HTTP
+//!   matrix and [`auth`] for the bearer middleware.
+//!
+//! ## Module map
+//!
+//! | Module        | Role                                                                                        |
+//! |---------------|---------------------------------------------------------------------------------------------|
+//! | [`app`]       | [`app::AppState`] (DB handle + bearer + path) and the Axum router composition.              |
+//! | [`auth`]      | Bearer-token middleware for `/api/ingest`.                                                  |
+//! | [`db`]        | [`db::DbHandle`] connection wrapper + the per-fact-table `measurement_id_*` hash functions. |
+//! | [`schema`]    | DuckDB DDL ([`schema::SCHEMA_DDL`]) and the wire schema version.                            |
+//! | [`records`]   | Wire shapes for `POST /api/ingest`.                                                         |
+//! | [`ingest`]    | `POST /api/ingest` handler — envelope validation, transaction, upsert dispatch.             |
+//! | [`error`]     | [`error::IngestError`] and [`error::ApiError`] with their HTTP-status mapping.              |
+//! | [`slug`]      | [`slug::ChartKey`] / [`slug::GroupKey`] enums + base64url round-trip.                       |
+//! | [`api`]       | Read API. `mod.rs` mounts the handlers; submodules are listed on its module doc.            |
+//! | [`html`]      | HTML pages — `mod.rs` mounts the routes; submodules render the body.                        |
+//!
+//! ## Request flow
+//!
+//! 1. Axum receives the request and routes by method + path.
+//! 2. `/api/ingest` first passes through [`auth::require_bearer`]; other
+//!    routes skip auth.
+//! 3. The handler parses body / path / query into typed inputs (e.g.
+//!    [`slug::ChartKey::from_slug`]).
+//! 4. The handler hands a closure to [`db::run_blocking`], which acquires
+//!    the connection mutex and runs the synchronous DuckDB call on
+//!    `tokio::task::spawn_blocking` so the runtime stays free.
+//! 5. The closure returns `Result<T, anyhow::Error>`. Errors are mapped
+//!    into [`error::IngestError`] / [`error::ApiError`] with the right
+//!    HTTP status.
+//! 6. The response is rendered (JSON via [`axum::Json`], HTML via the
+//!    `maud` router in [`html`]).
+//! 7. Every response passes through [`tower_http::compression::CompressionLayer`]
+//!    on the way out.
 
 pub mod api;
 pub mod app;
diff --git a/benchmarks-website/server/src/records.rs b/benchmarks-website/server/src/records.rs
index 0217675cdea..85ee1be8d61 100644
--- a/benchmarks-website/server/src/records.rs
+++ b/benchmarks-website/server/src/records.rs
@@ -3,10 +3,46 @@
 
 //! Wire shapes for `POST /api/ingest`.
 //!
-//! These types deserialize the ingest envelope defined in
-//! `benchmarks-website/planning/02-contracts.md`. Each variant of [`Record`]
-//! is gated by `#[serde(deny_unknown_fields)]`, so unknown fields produce
-//! a 400 with the offending record's index.
+//! Each [`Record`] variant deserializes one row destined for one of the five
+//! fact tables in [`crate::schema`]. The producer side of the contract lives
+//! in `vortex-bench/src/v3.rs` (the `--gh-json-v3` emitter); when changing a
+//! shape here, change both sides in the same commit.
+//!
+//! ## Records are discriminated by `kind`
+//!
+//! Every record carries a `kind` field that picks one of the five fact
+//! tables; serde drives this with `#[serde(tag = "kind", rename_all =
+//! "snake_case")]`.
+//!
+//! | `kind`               | Destination table       |
+//! |----------------------|-------------------------|
+//! | `query_measurement`  | `query_measurements`    |
+//! | `compression_time`   | `compression_times`     |
+//! | `compression_size`   | `compression_sizes`     |
+//! | `random_access_time` | `random_access_times`   |
+//! | `vector_search_run`  | `vector_search_runs`    |
+//!
+//! Every record struct carries `#[serde(deny_unknown_fields)]`, so unknown
+//! fields surface as a `400` with the offending record's index — version
+//! skew is supposed to fail loudly. Unknown `kind` values produce the same
+//! `400` from the outer enum's tag check.
+//!
+//! ## Ingest envelope
+//!
+//! `POST /api/ingest` accepts one [`Envelope`] per request. The envelope
+//! wraps a heterogeneous batch of records (any mix of `kind`s):
+//!
+//! - `run_meta` — [`RunMeta`] with `benchmark_id`, `schema_version`
+//!   (must equal [`crate::schema::SCHEMA_VERSION`]), and `started_at`.
+//! - `commit` — [`CommitInfo`] with the columns of the `commits` dim table,
+//!   keyed by their column names with `commit_sha` renamed to `sha`. The
+//!   server upserts this row before applying any record.
+//! - `records` — array of per-`kind` records.
+//!
+//! `vortex-bench --gh-json-v3 <path>` writes JSONL of bare records only —
+//! the envelope (`run_meta` + `commit`) is added by the post-ingest script
+//! before POSTing, which keeps the Rust emitter dependency-light and lets
+//! CI fill the commit fields from `${{ github.sha }}` plus `git show`.
 
 use serde::Deserialize;
 
diff --git a/benchmarks-website/server/src/schema.rs b/benchmarks-website/server/src/schema.rs
index be00f86eac6..70727ab7fab 100644
--- a/benchmarks-website/server/src/schema.rs
+++ b/benchmarks-website/server/src/schema.rs
@@ -1,11 +1,103 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! DuckDB schema DDL applied on server boot.
+//! DuckDB schema applied on server boot — one `commits` dim plus five fact
+//! tables, one per measurement family.
 //!
-//! See `benchmarks-website/planning/01-schema.md` for the column contracts.
-//! There is no migration framework at alpha: if the schema changes, delete
-//! the DuckDB file and restart.
+//! ## Design principles
+//!
+//! 1. **One fact table per (dim shape, value shape).** A row in any fact
+//!    table has every value column populated; NULLs only appear in genuinely
+//!    optional dimensions. The five families have different dim shapes, so
+//!    forcing them into one wide table either bloats every row with NULL
+//!    columns or splits a single scan's results across multiple rows that
+//!    have to be re-joined to render one chart.
+//! 2. **No discriminator columns spanning families.** No `metric_kind` enum
+//!    forcing the five shapes into one row.
+//! 3. **No JSON escape hatch.** New benchmark parameters become real columns.
+//!    Adding a nullable column is cheap; the readability win is worth it.
+//! 4. **Hashed primary key per fact table.** Every fact table's
+//!    `measurement_id` is a deterministic 64-bit hash of `commit_sha` plus
+//!    that table's dimensional tuple, computed in
+//!    [`crate::db::measurement_id_query`] et al. Including `commit_sha`
+//!    makes every (commit, dim) pair a distinct row — that is exactly what
+//!    the chart pages render as a time series. Re-emission of the same
+//!    (commit, dim) pair is the upsert case. The hash is **server-internal**
+//!    and never crosses a process boundary; the wire never carries it.
+//! 5. **`commits` is the only dim table.** Engine, format, dataset, etc.
+//!    stay as inline strings; DuckDB's dictionary encoding makes a lookup
+//!    table pointless.
+//! 6. **Ratios are not stored.** Computed at query time from
+//!    `compression_sizes`.
+//!
+//! ## Tables
+//!
+//! - **`commits`** — dim table. `commit_sha` is the PK. `timestamp`,
+//!   `tree_sha`, and `url` are required (the server cannot render a chart
+//!   without them); `message` and the author/committer name + email pair are
+//!   nullable so v2-imported rows that lacked them survive. Populated on
+//!   every `/api/ingest` from the envelope's `commit` block, and on every
+//!   migrator run from `commits.json`.
+//! - **`query_measurements`** — SQL query suite measurements (TPC-H, TPC-DS,
+//!   ClickBench, StatPopGen, PolarSignals, Fineweb, GhArchive, Public-BI).
+//!   Natural key: `(commit_sha, dataset, dataset_variant, scale_factor,
+//!   query_idx, storage, engine, format)`. Memory columns
+//!   (`peak_physical`, `peak_virtual`, `physical_delta`, `virtual_delta`)
+//!   are populated together when the run was instrumented for memory and
+//!   are NULL otherwise; the ingest path enforces "all four or none".
+//!   `dataset_variant` carries a categorical sub-name (Public-BI dataset,
+//!   ClickBench flavor); `scale_factor` is the TPC SF as a string.
+//! - **`compression_times`** — encode/decode timings from `compress-bench`.
+//!   Natural key: `(commit_sha, dataset, dataset_variant, format, op)`,
+//!   where `op ∈ {encode, decode}`. Encode and decode share a table because
+//!   they share dim and value shape; keeping them together makes the
+//!   per-format chart a single SQL query.
+//! - **`compression_sizes`** — on-disk sizes from `compress-bench`. One-shot
+//!   (no per-iteration data, no `all_runtimes_ns`). Natural key:
+//!   `(commit_sha, dataset, dataset_variant, format)`. Compression ratios
+//!   (e.g. `vortex:parquet-zstd`) are NOT stored — they are a SELECT over
+//!   this table joined to itself, computed in `api/summary.rs`.
+//! - **`random_access_times`** — take-time timings from
+//!   `random-access-bench`. Different dataset namespace from
+//!   `compression_times` (chimp, taxi, etc.) — kept in its own table so
+//!   dataset filters never have to disambiguate which suite a row belongs
+//!   to. Natural key: `(commit_sha, dataset, format)`.
+//! - **`vector_search_runs`** — cosine-similarity scans from
+//!   `vector-search-bench`. The only family that emits a timing **plus**
+//!   side counters (`matches`, `rows_scanned`, `bytes_scanned`) for the
+//!   same scan; keeping them in one row avoids a 1:N split that has to be
+//!   re-joined on read. Natural key: `(commit_sha, dataset, layout,
+//!   flavor, threshold)`. `iterations` is not part of the dim hash — it is
+//!   a side count, like `matches`.
+//!
+//! ## Column conventions
+//!
+//! - `commit_sha` is `TEXT NOT NULL` on every fact table and references the
+//!   `commits.commit_sha` PK. There is no FK constraint declared at alpha;
+//!   the ingest path upserts the commit before the records.
+//! - `value_ns` is the median per-iteration nanosecond timing for timing
+//!   tables. `value_bytes` is the on-disk byte count for `compression_sizes`.
+//! - `all_runtimes_ns BIGINT[]` carries the per-iteration timings inline.
+//!   DuckDB's list type avoids a child table; chart code only ever reads
+//!   `value_ns`, so the list is effectively cold storage today, kept for
+//!   future variance or distribution charts.
+//! - `storage` (only on `query_measurements`) is `nvme` or `s3`. Legacy `gcs`
+//!   was dropped during the v3 design pass.
+//! - `env_triple` is the `arch-os-env` host triple captured at run time
+//!   (e.g. `x86_64-linux-gnu`). Optional everywhere; useful for slicing
+//!   results by host class once the data set has more than one host class.
+//!
+//! ## Schema changes
+//!
+//! There is no migration framework. If you change the schema:
+//!
+//! 1. Update [`SCHEMA_DDL`] and the matching [`crate::records`] struct.
+//! 2. Update or delete any local `bench.duckdb` (the migrator's
+//!    `open_target_db` already deletes-and-recreates).
+//! 3. Bump [`SCHEMA_VERSION`] if the wire envelope's
+//!    `run_meta.schema_version` semantics change.
+//!
+//! A real forward-only migration framework is post-cutover work.
 
 /// DDL for the `commits` dim plus the five fact tables.
 pub const SCHEMA_DDL: &str = r#"
diff --git a/vortex-bench/src/datasets/mod.rs b/vortex-bench/src/datasets/mod.rs
index 7a499ff6998..d89f99d36f4 100644
--- a/vortex-bench/src/datasets/mod.rs
+++ b/vortex-bench/src/datasets/mod.rs
@@ -37,13 +37,14 @@ pub(crate) fn normalize_benchmark_runner_id(benchmark_runner: &str) -> String {
 pub trait Dataset {
     fn name(&self) -> &str;
 
-    /// Map this dataset to the v3 `(dataset, dataset_variant)` pair.
+    /// Map this dataset to the v3 `(dataset, dataset_variant)` pair emitted
+    /// in `compression_*` records.
     ///
     /// Default: `(name(), None)`. Override for suites that have a parent
     /// namespace and a sub-dataset (e.g. Public-BI emits
     /// `dataset = "public-bi"`, `dataset_variant = "<sub-dataset name>"`).
-    /// The convention matches the SQL query path; see the per-suite dim
-    /// values table in `benchmarks-website/planning/benchmark-mapping.md`.
+    /// The query-side equivalent is documented on
+    /// [`crate::v3::benchmark_dataset_dims`].
     fn v3_dataset_dims(&self) -> (&str, Option<&str>) {
         (self.name(), None)
     }
diff --git a/vortex-bench/src/v3.rs b/vortex-bench/src/v3.rs
index 1e42a872214..c2dabca1ff9 100644
--- a/vortex-bench/src/v3.rs
+++ b/vortex-bench/src/v3.rs
@@ -3,13 +3,71 @@
 
 //! v3 wire-format records emitted by `--gh-json-v3`.
 //!
-//! See `benchmarks-website/planning/02-contracts.md` for the discriminated record
-//! format and `benchmarks-website/planning/01-schema.md` for the destination
-//! tables. The records emitted here are bare: the post-ingest envelope
-//! (`run_meta` + `commit`) is added by `scripts/post-ingest.py` before POSTing
-//! to `/api/ingest`.
+//! Each record on the wire is one of five `kind`-discriminated shapes that
+//! map 1:1 to the v3 fact tables. The records here are **bare**: the
+//! ingest envelope (`run_meta` + `commit`) is added by
+//! `scripts/post-ingest.py` before POSTing to
+//! `bench.vortex.dev/api/ingest` — keeps the Rust emitter dependency-light
+//! and lets CI fill the commit fields from `${{ github.sha }}` plus
+//! `git show`.
 //!
-//! This module is purely additive to the existing `gh-json` emission path.
+//! Wire-shape source of truth: [`vortex_bench_server::records`]. When
+//! changing a shape, change both sides in the same commit and run the
+//! server's snapshot tests.
+//!
+//! ## Producer mapping
+//!
+//! Every emitter / measurement type in `vortex-bench` maps to exactly one
+//! `kind`:
+//!
+//! | Source measurement                                                       | Wire `kind`           | Notes                                                                                                                  |
+//! |--------------------------------------------------------------------------|-----------------------|------------------------------------------------------------------------------------------------------------------------|
+//! | [`crate::measurements::QueryMeasurement`] (+ paired `MemoryMeasurement`) | `query_measurement`   | Two structs collapse into **one** record; memory fields omitted if `--track-memory` was off.                           |
+//! | [`crate::measurements::TimingMeasurement`] (random-access only)          | `random_access_time`  |                                                                                                                        |
+//! | [`crate::measurements::CompressionTimingMeasurement`]                    | `compression_time`    | `op` is decided by which side of `compress-bench`'s timing loop produced it.                                           |
+//! | `CompressionSizeMeasurement` (in `vortex-bench/src/compress/mod.rs`)     | `compression_size`    | Was previously a `CustomUnitMeasurement` with a byte unit; now extracted explicitly.                                   |
+//! | Cross-format ratio `CustomUnitMeasurement` rows                          | **dropped**           | Computed on read from `compression_sizes`.                                                                             |
+//! | `ScanTiming` (vector-search)                                             | `vector_search_run`   | Carries timing **and** the three side counters in the same row.                                                        |
+//!
+//! ## Per-binary inventory
+//!
+//! - `benchmarks/datafusion-bench` and `benchmarks/duckdb-bench` produce
+//!   `QueryMeasurement` (+ `MemoryMeasurement` when `--track-memory`) →
+//!   `query_measurements` with `engine = "datafusion"` or `"duckdb"`.
+//! - `benchmarks/lance-bench` is three things in one crate: a query runner
+//!   (`format = lance`) → `query_measurements`; a compression runner →
+//!   `compression_times` + `compression_sizes` with `format = lance`; a
+//!   random-access runner → `random_access_times` with `format = lance`.
+//! - `benchmarks/compress-bench` produces encode + decode
+//!   `CompressionTimingMeasurement` → `compression_times` (with
+//!   `op ∈ {encode, decode}`) and on-disk-size measurements →
+//!   `compression_sizes`. Ratio `CustomUnitMeasurement` rows are dropped;
+//!   the reader recomputes ratios.
+//! - `benchmarks/random-access-bench` produces `TimingMeasurement` →
+//!   `random_access_times`. Datasets here (chimp, taxi, ...) are a
+//!   different namespace from the SQL query suites.
+//! - `benchmarks/vector-search-bench` produces `ScanTiming` →
+//!   `vector_search_runs`. `dataset`, `layout`, `flavor`, and `threshold`
+//!   live on the binary's `Args`; the emitter plumbs them through to the
+//!   record, since the timing struct itself does not carry them.
+//!
+//! ## Per-suite query dim values
+//!
+//! For SQL query suites (everything that flows through `query_measurements`),
+//! the dim columns are populated as documented on
+//! [`benchmark_dataset_dims`].
+//!
+//! ## Historical-data side
+//!
+//! [`vortex_bench_migrate::classifier`] is the bug-for-bug port of v2's
+//! `getGroup` that recovers the same `(kind, dim tuple)` triple from the
+//! v2 S3 dump. It exists only for the one-shot migration; once cutover
+//! lands and the historical archive is loaded, both the migrator and its
+//! classifier go away. For new ingest, no classifier is needed — the
+//! emitter writes v3-shape records directly.
+//!
+//! [`vortex_bench_server::records`]: ../../../benchmarks-website/server/src/records.rs
+//! [`vortex_bench_migrate::classifier`]: ../../../benchmarks-website/migrate/src/classifier.rs
 
 use std::io::Write;
 use std::sync::LazyLock;
@@ -218,8 +276,20 @@ fn canonical_tpc_scale_factor(scale_factor: &str) -> String {
 /// Map a [`BenchmarkDataset`] to the `(dataset, dataset_variant, scale_factor)`
 /// triple emitted in `query_measurement` records.
 ///
-/// Mirrors the `Per-suite dim values` table in
-/// `benchmarks-website/planning/benchmark-mapping.md`.
+/// The mapping is fixed because v3's chart grouping reads the dim columns
+/// directly. Live records must use the same shape the v2 → v3 migrator
+/// produces so the two streams collapse onto one chart group.
+///
+/// | `BenchmarkDataset` | `dataset` | `dataset_variant` | `scale_factor` | Notes |
+/// |---|---|---|---|---|
+/// | `TpcH { scale_factor }`     | `tpch`         | `None`              | TPC SF as string (`"1"`, `"10"`, `"100"`, `"1000"`) | Run through [`canonical_tpc_scale_factor`] so `"1.0"` and `"1"` collapse. |
+/// | `TpcDS { scale_factor }`    | `tpcds`        | `None`              | TPC SF as string                                    | Same canonicalization as TPC-H. |
+/// | `ClickBench { flavor: _ }`  | `clickbench`   | `None`              | `None`                                              | Migrate path drops flavor; live emitter matches so historical and live merge. |
+/// | `StatPopGen { n_rows: _ }`  | `statpopgen`   | `None`              | `None`                                              | Migrate path carries no SF for this suite; live drops it for the same reason. |
+/// | `PolarSignals { n_rows: _ }`| `polarsignals` | `None`              | `None`                                              | Same as StatPopGen. |
+/// | `Fineweb`                   | `fineweb`      | `None`              | `None`                                              | |
+/// | `GhArchive`                 | `gharchive`    | `None`              | `None`                                              | |
+/// | `PublicBi { name }`         | `public-bi`    | dataset name (e.g. `cms-provider`) | `None`               | Sub-dataset name lives in `dataset_variant`. |
 pub fn benchmark_dataset_dims(d: &BenchmarkDataset) -> (String, Option<String>, Option<String>) {
     match d {
         BenchmarkDataset::TpcH { scale_factor } => (

From e491b1d87fc57875d27a7280d10038d65f5e6429 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 6 May 2026 04:40:15 +0000
Subject: [PATCH 2/3] cleanup(benchmarks-website): trim dead code, stale
 comments, missing docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-cutover quality pass on `server/` and `migrate/`:

- `server/tests/common/mod.rs`: drop the 18 `unreachable_pub` warnings
  per integration-test binary by switching every shared helper to
  `pub(crate)`. The `#![allow(dead_code)]` stays — different test
  binaries still use different subsets.
- `server/src/db.rs`: hoist the six function-scoped `use std::hash::Hasher`
  statements to the module top, and add doc comments on `finish` and
  `hasher_for` explaining why each exists.
- `server/src/{records, app, ingest}.rs` and `migrate/src/{classifier,
  commits, v2, verify}.rs`: every previously-undocumented `pub` field
  on the wire / state structs now carries a single-line `///` doc.
  Field semantics (units, encoding, role) that were only implicit in
  the field name are now explicit.
- `server/src/main.rs`: document the four environment variables
  (`INGEST_BEARER_TOKEN`, `VORTEX_BENCH_DB`, `VORTEX_BENCH_BIND`,
  `VORTEX_BENCH_LOG`) on the binary's `//!` doc.
- `server/src/{slug, error}.rs`: remove the last `planning/*.md`
  cross-references, replacing them with the [`crate::*`] links that
  point at the live source of truth.
- `static/chart-init.js`: fix the stale `server/src/html.rs` references
  in two comment blocks (the file is now `server/src/html/mod.rs`),
  rebuild the file map at the top to match the actual section
  ordering, and add the two canvas state contract entries
  (`__bench_display_unit`, `__bench_y_user_set`) that the orientation
  comment had drifted from.

Verification:

- `cargo clippy -p vortex-bench-server -p vortex-bench-migrate --all-targets
  --all-features -- -W dead_code -W unreachable_pub -W unused_imports`
  → 0 warnings.
- `cargo test -p vortex-bench-server -p vortex-bench-migrate` → all 140+
  tests pass.
- `cargo test --doc -p vortex-bench-server -p vortex-bench-migrate` → 0
  failures.
- No snapshot regenerated.
- v2 site files (`server.js`, `src/`, `package.json`, etc.) untouched.

Signed-off-by: Claude <noreply@anthropic.com>
---
 benchmarks-website/migrate/src/classifier.rs  | 20 ++++-
 benchmarks-website/migrate/src/commits.rs     |  2 +
 benchmarks-website/migrate/src/v2.rs          | 32 +++++++
 benchmarks-website/migrate/src/verify.rs      |  8 ++
 benchmarks-website/server/src/app.rs          |  3 +
 benchmarks-website/server/src/db.rs           | 19 ++--
 benchmarks-website/server/src/error.rs        |  5 +-
 benchmarks-website/server/src/ingest.rs       |  2 +
 benchmarks-website/server/src/main.rs         | 14 ++-
 benchmarks-website/server/src/records.rs      | 89 +++++++++++++++++--
 benchmarks-website/server/src/slug.rs         | 22 ++---
 .../server/static/chart-init.js               | 49 ++++++----
 benchmarks-website/server/tests/chart_api.rs  |  4 +-
 benchmarks-website/server/tests/common/mod.rs | 44 +++++----
 benchmarks-website/server/tests/ingest.rs     |  5 +-
 15 files changed, 244 insertions(+), 74 deletions(-)

diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs
index 8e1c1e2a110..e473c2a6a02 100644
--- a/benchmarks-website/migrate/src/classifier.rs
+++ b/benchmarks-website/migrate/src/classifier.rs
@@ -103,14 +103,22 @@ const ENGINE_RENAMES: &[(&str, &str)] = &[
     ("lance", "lance"),
 ];
 
-/// One entry of `QUERY_SUITES`.
+/// One entry of [`QUERY_SUITES`].
 #[derive(Debug, Clone, Copy)]
 pub struct QuerySuite {
+    /// Lowercase suite prefix used to match v2 record names (e.g. `tpch`).
     pub prefix: &'static str,
+    /// Human-readable suite name as v2 served it from `/api/metadata`.
     pub display_name: &'static str,
+    /// Uppercase prefix v2's `formatQuery` produced (e.g. `TPC-H`).
     pub query_prefix: &'static str,
+    /// Override for the dataset key v2 records use inside their `dataset`
+    /// object. Falls back to `prefix` when `None`.
     pub dataset_key: Option<&'static str>,
+    /// True if the suite's group name fans out by `(storage, scale_factor)`
+    /// (e.g. `TPC-H (NVMe) (SF=1)`); false collapses to a single group.
     pub fan_out: bool,
+    /// True if v2 deliberately ignored this suite (no live group is rendered).
     pub skip: bool,
 }
 
@@ -300,8 +308,12 @@ pub fn get_group(record: &V2Record) -> Option<V2Group> {
 /// `(group, chartName, seriesName)` triple after rename / skip rules.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct V2Classification {
+    /// Group the v2 server would place this record in.
     pub group: V2Group,
+    /// Chart name v2 displayed for this record (uppercase, separators
+    /// normalized).
     pub chart: String,
+    /// Series name after v2's `ENGINE_RENAMES` was applied.
     pub series: String,
 }
 
@@ -751,8 +763,10 @@ fn bin_query(cls: &V2Classification, record: &V2Record) -> Option<V3Bin> {
         _ => "nvme".to_string(),
     };
 
-    // ClickBench's "flavor" lives in dataset_variant per benchmark-mapping.md
-    // - we don't have it from a v2 name string, so we leave it None.
+    // ClickBench's "flavor" lives in `dataset_variant`, but v2 record names
+    // never encoded it — leave it `None` so historical and live rows merge
+    // (the live emitter does the same; see `vortex-bench/src/v3.rs`'s
+    // `benchmark_dataset_dims` for the matching shape).
     Some(V3Bin::Query {
         dataset: suite.prefix.to_string(),
         dataset_variant: None,
diff --git a/benchmarks-website/migrate/src/commits.rs b/benchmarks-website/migrate/src/commits.rs
index 87c53caa41b..a9c5f056cd7 100644
--- a/benchmarks-website/migrate/src/commits.rs
+++ b/benchmarks-website/migrate/src/commits.rs
@@ -91,5 +91,7 @@ fn optional_field(field: &Option<String>) -> Option<String> {
 /// Per-call warning bag returned to the caller for logging.
 #[derive(Debug, Default)]
 pub struct UpsertOutcome {
+    /// Human-readable warnings — typically one per missing required field on
+    /// the v2 commit (timestamp, tree_id, url).
     pub warnings: Vec<String>,
 }
diff --git a/benchmarks-website/migrate/src/v2.rs b/benchmarks-website/migrate/src/v2.rs
index dd8190346bb..79785ba1fd4 100644
--- a/benchmarks-website/migrate/src/v2.rs
+++ b/benchmarks-website/migrate/src/v2.rs
@@ -19,19 +19,32 @@ use serde::Deserialize;
 /// optional because different benches emit different subsets.
 #[derive(Debug, Clone, Deserialize)]
 pub struct V2Record {
+    /// Slash-separated benchmark identifier (e.g. `tpch_q01/datafusion:vortex-file-compressed`).
+    /// The classifier parses this string to recover dim values.
     pub name: String,
+    /// 40-hex commit SHA. Present on every well-formed v2 record.
     #[serde(default)]
     pub commit_id: Option<String>,
+    /// v2 unit string (`ns`, `bytes`, `ratio`, ...). Not used for routing —
+    /// the classifier picks the v3 fact table from the `name` prefix instead.
     #[serde(default)]
     pub unit: Option<String>,
+    /// Polymorphic value — emitters wrote both numbers and stringified
+    /// numbers. Use [`value_as_f64`] to normalize.
     #[serde(default)]
     pub value: Option<serde_json::Value>,
+    /// Storage backend the run targeted (`S3` or `NVMe`, mixed case in v2).
     #[serde(default)]
     pub storage: Option<String>,
+    /// Polymorphic dataset block — sometimes a string, sometimes an object
+    /// keyed by suite name with a `scale_factor` inside (use
+    /// [`dataset_scale_factor`]).
     #[serde(default)]
     pub dataset: Option<serde_json::Value>,
+    /// Per-iteration runtimes; same numeric polymorphism as `value`.
     #[serde(default)]
     pub all_runtimes: Option<Vec<serde_json::Value>>,
+    /// Host environment triple block.
     #[serde(default)]
     pub env_triple: Option<V2EnvTriple>,
 }
@@ -101,10 +114,13 @@ pub fn runtime_as_i64(value: &serde_json::Value) -> Option<i64> {
 /// stored it as an object; we serialize it back out as `arch-os-env`.
 #[derive(Debug, Clone, Deserialize)]
 pub struct V2EnvTriple {
+    /// Host CPU architecture (e.g. `x86_64`).
     #[serde(default)]
     pub architecture: Option<String>,
+    /// Operating system name (e.g. `linux`).
     #[serde(default)]
     pub operating_system: Option<String>,
+    /// Host environment label (e.g. `gnu`).
     #[serde(default)]
     pub environment: Option<String>,
 }
@@ -122,17 +138,25 @@ impl V2EnvTriple {
 /// One JSONL line of `commits.json`.
 #[derive(Debug, Clone, Deserialize)]
 pub struct V2Commit {
+    /// 40-hex commit SHA (the v2 schema named this `id`, not `commit_sha`).
     pub id: String,
+    /// RFC 3339 commit timestamp; required for the v3 row but tolerated as
+    /// missing in the source dump.
     #[serde(default)]
     pub timestamp: Option<String>,
+    /// Full commit message.
     #[serde(default)]
     pub message: Option<String>,
+    /// Author block.
     #[serde(default)]
     pub author: Option<V2Person>,
+    /// Committer block.
     #[serde(default)]
     pub committer: Option<V2Person>,
+    /// Git tree SHA.
     #[serde(default)]
     pub tree_id: Option<String>,
+    /// GitHub commit URL.
     #[serde(default)]
     pub url: Option<String>,
 }
@@ -140,8 +164,10 @@ pub struct V2Commit {
 /// Author or committer block on a v2 commit record.
 #[derive(Debug, Clone, Deserialize)]
 pub struct V2Person {
+    /// Display name.
     #[serde(default)]
     pub name: Option<String>,
+    /// Email address.
     #[serde(default)]
     pub email: Option<String>,
 }
@@ -150,12 +176,18 @@ pub struct V2Person {
 /// `scripts/capture-file-sizes.py`.
 #[derive(Debug, Clone, Deserialize)]
 pub struct V2FileSize {
+    /// 40-hex commit SHA.
     pub commit_id: String,
+    /// Compression dataset name (`benchmark` is the v2 field name).
     pub benchmark: String,
+    /// TPC SF as a string when relevant.
     #[serde(default)]
     pub scale_factor: Option<String>,
+    /// Format the file was produced in.
     pub format: String,
+    /// Path of the underlying file (e.g. `lineitem.parquet`); informational.
     pub file: String,
+    /// Size in bytes; summed across files in the same `(commit, dataset, format)`.
     pub size_bytes: i64,
 }
 
diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs
index 743dff1e528..c855904a5a5 100644
--- a/benchmarks-website/migrate/src/verify.rs
+++ b/benchmarks-website/migrate/src/verify.rs
@@ -24,9 +24,14 @@ use crate::classifier::QUERY_SUITES;
 /// Result of one `verify` run.
 #[derive(Debug, Default)]
 pub struct VerifyReport {
+    /// Group display names present in both v2 and v3.
     pub matched_groups: Vec<String>,
+    /// Group display names that exist in v3 but not v2.
     pub only_in_v3: Vec<String>,
+    /// Group display names that exist in v2 but not v3 — these gate the CLI's
+    /// non-zero exit.
     pub only_in_v2: Vec<String>,
+    /// Per-group chart-count diffs for groups present on both sides.
     pub chart_diffs: Vec<ChartDiff>,
 }
 
@@ -34,8 +39,11 @@ pub struct VerifyReport {
 /// group is structurally present on both sides but the counts differ.
 #[derive(Debug, Clone)]
 pub struct ChartDiff {
+    /// Group display name.
     pub group: String,
+    /// Number of charts v2 reported for this group.
     pub v2_count: usize,
+    /// Number of charts the migrated v3 DuckDB has for this group.
     pub v3_count: usize,
 }
 
diff --git a/benchmarks-website/server/src/app.rs b/benchmarks-website/server/src/app.rs
index 98aab6e6c94..759205431b1 100644
--- a/benchmarks-website/server/src/app.rs
+++ b/benchmarks-website/server/src/app.rs
@@ -34,8 +34,11 @@ use crate::ingest;
 /// or a small `String`).
 #[derive(Clone)]
 pub struct AppState {
+    /// Mutex-guarded DuckDB connection. See [`crate::db`].
     pub db: DbHandle,
+    /// Bearer token expected on `/api/ingest`. Compared via constant-time eq.
     pub bearer_token: Arc<String>,
+    /// On-disk path of the DuckDB file. Surfaced on `/health`.
     pub db_path: Arc<PathBuf>,
 }
 
diff --git a/benchmarks-website/server/src/db.rs b/benchmarks-website/server/src/db.rs
index 1233098572c..ce503a701ae 100644
--- a/benchmarks-website/server/src/db.rs
+++ b/benchmarks-website/server/src/db.rs
@@ -14,6 +14,7 @@
 //! hash never crosses a process boundary, so the exact byte layout below
 //! is private to this server.
 
+use std::hash::Hasher as _;
 use std::path::Path;
 use std::sync::Arc;
 
@@ -59,22 +60,18 @@ where
     .context("DB task panicked")?
 }
 
-/// Hash a sequence of fields with a per-table tag to produce a 64-bit
-/// `measurement_id`. The bit-cast to `i64` is intentional: DuckDB's `BIGINT`
-/// is signed.
+/// Finalize the hash and bit-cast to `i64` because DuckDB's `BIGINT` is
+/// signed.
 fn finish(hasher: XxHash64) -> i64 {
-    use std::hash::Hasher as _;
     hasher.finish() as i64
 }
 
 fn write_str(hasher: &mut XxHash64, s: &str) {
-    use std::hash::Hasher as _;
     hasher.write_u64(s.len() as u64);
     hasher.write(s.as_bytes());
 }
 
 fn write_opt_str(hasher: &mut XxHash64, s: Option<&str>) {
-    use std::hash::Hasher as _;
     match s {
         Some(s) => {
             hasher.write_u8(1);
@@ -85,17 +82,17 @@ fn write_opt_str(hasher: &mut XxHash64, s: Option<&str>) {
 }
 
 fn write_i32(hasher: &mut XxHash64, v: i32) {
-    use std::hash::Hasher as _;
     hasher.write_i32(v);
 }
 
 fn write_f64(hasher: &mut XxHash64, v: f64) {
-    use std::hash::Hasher as _;
     hasher.write_u64(v.to_bits());
 }
 
+/// Initialize a hasher seeded with a per-table tag so two fact tables that
+/// happen to share the same dim values still produce distinct
+/// `measurement_id`s.
 fn hasher_for(tag: &'static str) -> XxHash64 {
-    use std::hash::Hasher as _;
     let mut h = XxHash64::with_seed(0);
     h.write(tag.as_bytes());
     h.write_u8(0);
@@ -148,8 +145,8 @@ pub fn measurement_id_random_access(r: &RandomAccessTime) -> i64 {
     finish(h)
 }
 
-/// Hash for `vector_search_runs` rows. `iterations` is intentionally not part
-/// of the dim tuple per `01-schema.md`.
+/// Hash for `vector_search_runs` rows. `iterations` is intentionally not
+/// part of the dim tuple — it is a side count, not a dimension.
 pub fn measurement_id_vector_search(r: &VectorSearchRun) -> i64 {
     let mut h = hasher_for("vector_search_runs");
     write_str(&mut h, &r.commit_sha);
diff --git a/benchmarks-website/server/src/error.rs b/benchmarks-website/server/src/error.rs
index f477ccf63d3..ffaee55a388 100644
--- a/benchmarks-website/server/src/error.rs
+++ b/benchmarks-website/server/src/error.rs
@@ -3,8 +3,9 @@
 
 //! Error types for the bench server.
 //!
-//! [`IngestError`] models the HTTP matrix from `02-contracts.md` for the
-//! `POST /api/ingest` route. [`ApiError`] is the catch-all for read routes.
+//! [`IngestError`] models the HTTP matrix documented on
+//! [`crate::ingest`] for the `POST /api/ingest` route. [`ApiError`] is
+//! the catch-all for read routes.
 
 use axum::Json;
 use axum::http::StatusCode;
diff --git a/benchmarks-website/server/src/ingest.rs b/benchmarks-website/server/src/ingest.rs
index 2ada60dadf0..9b6844dca44 100644
--- a/benchmarks-website/server/src/ingest.rs
+++ b/benchmarks-website/server/src/ingest.rs
@@ -59,7 +59,9 @@ use crate::schema::SCHEMA_VERSION;
 /// Successful ingest response body.
 #[derive(Debug, Serialize)]
 pub struct IngestResponse {
+    /// Rows inserted across all five fact tables in this transaction.
     pub inserted: u64,
+    /// Rows that hit `ON CONFLICT (measurement_id) DO UPDATE`.
     pub updated: u64,
 }
 
diff --git a/benchmarks-website/server/src/main.rs b/benchmarks-website/server/src/main.rs
index 93768fdbfca..00ff8418874 100644
--- a/benchmarks-website/server/src/main.rs
+++ b/benchmarks-website/server/src/main.rs
@@ -1,7 +1,19 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Binary entrypoint for the bench.vortex.dev v3 alpha server.
+//! Binary entrypoint for `vortex-bench-server`.
+//!
+//! Reads four environment variables before handing off to
+//! [`vortex_bench_server::app::router`]:
+//!
+//! - `INGEST_BEARER_TOKEN` — required. Token presented by ingest clients
+//!   on `Authorization: Bearer <token>`. Compared in constant time.
+//! - `VORTEX_BENCH_DB` — DuckDB file path. Default: `bench.duckdb` in the
+//!   working directory.
+//! - `VORTEX_BENCH_BIND` — `host:port` to listen on. Default
+//!   `127.0.0.1:3000`. Override to `0.0.0.0:3000` for container deploys.
+//! - `VORTEX_BENCH_LOG` — `tracing-subscriber` env filter spec. Default
+//!   `info`.
 
 use std::env;
 use std::path::PathBuf;
diff --git a/benchmarks-website/server/src/records.rs b/benchmarks-website/server/src/records.rs
index 85ee1be8d61..e71128b759e 100644
--- a/benchmarks-website/server/src/records.rs
+++ b/benchmarks-website/server/src/records.rs
@@ -53,8 +53,11 @@ use serde::Deserialize;
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct Envelope {
+    /// Per-run metadata, including the wire schema version.
     pub run_meta: RunMeta,
+    /// Commit context — upserted into `commits` before any record is applied.
     pub commit: CommitInfo,
+    /// Heterogeneous batch of fact-table records.
     pub records: Vec<Record>,
 }
 
@@ -63,24 +66,38 @@ pub struct Envelope {
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct RunMeta {
+    /// Free-form ID of the producing run (e.g. `bench.yml@<run_id>`).
     pub benchmark_id: String,
+    /// Wire schema version. Must equal [`crate::schema::SCHEMA_VERSION`].
     pub schema_version: i32,
+    /// RFC 3339 timestamp at which the run started.
     pub started_at: String,
 }
 
 /// Columns for the `commits` dim table. The wire field for `commit_sha` is
-/// renamed to `sha` per the contract.
+/// renamed to `sha` per the contract; every other field name matches the
+/// column name in [`crate::schema`].
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct CommitInfo {
+    /// 40-hex lowercase commit SHA.
     pub sha: String,
+    /// RFC 3339 / ISO 8601 timestamp of the commit.
     pub timestamp: String,
+    /// Full commit message (the server renders only the first line).
     pub message: String,
+    /// Author's display name.
     pub author_name: String,
+    /// Author's email.
     pub author_email: String,
+    /// Committer's display name.
     pub committer_name: String,
+    /// Committer's email.
     pub committer_email: String,
+    /// Git tree SHA the commit points at.
     pub tree_sha: String,
+    /// GitHub URL for the commit (used as the click-through fallback when
+    /// no `(#NNNN)` tag is present in the message).
     pub url: String,
 }
 
@@ -88,97 +105,159 @@ pub struct CommitInfo {
 #[derive(Debug, Deserialize)]
 #[serde(tag = "kind", rename_all = "snake_case")]
 pub enum Record {
+    /// `query_measurement` → `query_measurements` table.
     QueryMeasurement(QueryMeasurement),
+    /// `compression_time` → `compression_times` table.
     CompressionTime(CompressionTime),
+    /// `compression_size` → `compression_sizes` table.
     CompressionSize(CompressionSize),
+    /// `random_access_time` → `random_access_times` table.
     RandomAccessTime(RandomAccessTime),
+    /// `vector_search_run` → `vector_search_runs` table.
     VectorSearchRun(VectorSearchRun),
 }
 
-/// SQL query suite measurement (TPC-H, ClickBench, ...).
+/// SQL query suite measurement (TPC-H, ClickBench, ...). Lands in
+/// `query_measurements`. Field names match the schema columns; per-suite dim
+/// values are documented on
+/// [`vortex_bench::v3::benchmark_dataset_dims`](../../../vortex-bench/src/v3.rs).
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct QueryMeasurement {
+    /// 40-hex lowercase SHA of the producing commit.
     pub commit_sha: String,
+    /// Top-level suite (e.g. `tpch`, `clickbench`, `public-bi`).
     pub dataset: String,
+    /// Categorical sub-name (Public-BI dataset; ClickBench flavor).
     #[serde(default)]
     pub dataset_variant: Option<String>,
+    /// TPC SF as a string. Populated for TPC-H/TPC-DS, NULL elsewhere.
     #[serde(default)]
     pub scale_factor: Option<String>,
+    /// 1-based query index inside the suite.
     pub query_idx: i32,
+    /// Storage backend the run targeted: `nvme` or `s3`. Validated on insert.
     pub storage: String,
+    /// Engine (`datafusion`, `duckdb`, `vortex`, `arrow`).
     pub engine: String,
+    /// On-disk format (`parquet`, `vortex-file-compressed`, `lance`, ...).
     pub format: String,
+    /// Median per-iteration wall time in nanoseconds.
     pub value_ns: i64,
+    /// Per-iteration wall times in nanoseconds (median of these is `value_ns`).
     pub all_runtimes_ns: Vec<i64>,
+    /// Peak resident-set bytes during the query, when memory tracking was on.
     #[serde(default)]
     pub peak_physical: Option<i64>,
+    /// Peak virtual-memory bytes during the query, when memory tracking was on.
     #[serde(default)]
     pub peak_virtual: Option<i64>,
+    /// Resident-set delta across the query, when memory tracking was on.
     #[serde(default)]
     pub physical_delta: Option<i64>,
+    /// Virtual-memory delta across the query, when memory tracking was on.
     #[serde(default)]
     pub virtual_delta: Option<i64>,
+    /// Host environment triple (e.g. `x86_64-linux-gnu`).
     #[serde(default)]
     pub env_triple: Option<String>,
 }
 
-/// Encode/decode timing from `compress-bench`.
+/// Encode-or-decode timing from `compress-bench`. Lands in
+/// `compression_times`.
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct CompressionTime {
+    /// 40-hex lowercase SHA of the producing commit.
     pub commit_sha: String,
+    /// Compression dataset name.
     pub dataset: String,
+    /// Optional dataset variant (reserved; unused at alpha).
     #[serde(default)]
     pub dataset_variant: Option<String>,
+    /// On-disk format the timing applies to.
     pub format: String,
+    /// `encode` or `decode`. The server treats it as opaque on the wire.
     pub op: String,
+    /// Median per-iteration wall time in nanoseconds.
     pub value_ns: i64,
+    /// Per-iteration wall times in nanoseconds.
     pub all_runtimes_ns: Vec<i64>,
+    /// Host environment triple.
     #[serde(default)]
     pub env_triple: Option<String>,
 }
 
 /// On-disk size from `compress-bench`. One-shot, no per-iteration data.
+/// Lands in `compression_sizes`. Compression ratios (e.g. `vortex/parquet`)
+/// are NOT a separate record kind — they are computed at read time from
+/// pairs of these rows.
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct CompressionSize {
+    /// 40-hex lowercase SHA of the producing commit.
     pub commit_sha: String,
+    /// Compression dataset name.
     pub dataset: String,
+    /// Optional dataset variant (reserved; unused at alpha).
     #[serde(default)]
     pub dataset_variant: Option<String>,
+    /// On-disk format the size applies to.
     pub format: String,
+    /// Compressed-file size in bytes.
     pub value_bytes: i64,
 }
 
-/// Take-time timing from `random-access-bench`.
+/// Take-time timing from `random-access-bench`. Lands in
+/// `random_access_times`. Datasets here (chimp, taxi, ...) are a different
+/// namespace from the SQL query suites' dataset names.
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct RandomAccessTime {
+    /// 40-hex lowercase SHA of the producing commit.
     pub commit_sha: String,
+    /// Random-access dataset name.
     pub dataset: String,
+    /// On-disk format the timing applies to.
     pub format: String,
+    /// Median per-iteration wall time in nanoseconds.
     pub value_ns: i64,
+    /// Per-iteration wall times in nanoseconds.
     pub all_runtimes_ns: Vec<i64>,
+    /// Host environment triple.
     #[serde(default)]
     pub env_triple: Option<String>,
 }
 
-/// Cosine-similarity scan from `vector-search-bench`.
+/// Cosine-similarity scan from `vector-search-bench`. Lands in
+/// `vector_search_runs`. The only family that emits timing **plus** side
+/// counters in the same row.
 #[derive(Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct VectorSearchRun {
+    /// 40-hex lowercase SHA of the producing commit.
     pub commit_sha: String,
+    /// Vector dataset name (e.g. `cohere-large-10m`).
     pub dataset: String,
+    /// Train-split layout label.
     pub layout: String,
+    /// Compression flavor label.
     pub flavor: String,
+    /// Cosine threshold passed to the scan filter.
     pub threshold: f64,
+    /// Median per-scan wall time in nanoseconds.
     pub value_ns: i64,
+    /// Per-iteration wall times in nanoseconds.
     pub all_runtimes_ns: Vec<i64>,
+    /// Number of rows that survived the cosine filter.
     pub matches: i64,
+    /// Total rows scanned.
     pub rows_scanned: i64,
+    /// Total on-disk bytes scanned.
     pub bytes_scanned: i64,
+    /// Number of timed iterations. Not part of the dim hash.
     pub iterations: i32,
+    /// Host environment triple.
     #[serde(default)]
     pub env_triple: Option<String>,
 }
diff --git a/benchmarks-website/server/src/slug.rs b/benchmarks-website/server/src/slug.rs
index 911a6719484..dcb22983e51 100644
--- a/benchmarks-website/server/src/slug.rs
+++ b/benchmarks-website/server/src/slug.rs
@@ -1,15 +1,14 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Opaque slugs for `/api/chart/:slug`.
+//! Opaque slugs for `/api/chart/:slug` and `/api/group/:slug`.
 //!
-//! Per `02-contracts.md`, the web-ui treats slugs as opaque strings: it
-//! receives them from `/api/groups` and feeds them back unchanged to
-//! `/api/chart/:slug`. The server is free to choose any format.
-//!
-//! Slugs here are `<prefix>.<base64url-of-json>` where `<prefix>` names the
-//! source fact table and the JSON encodes the chart key. Round-tripping the
-//! slug back gives a strongly-typed [`ChartKey`].
+//! The web-ui treats slugs as opaque strings: it receives them from
+//! `/api/groups` and feeds them back unchanged. The server is free to
+//! choose any format — slugs here are
+//! `<prefix>.<base64url-of-json>`, where `<prefix>` names the source
+//! fact table and the JSON encodes the chart or group key. Round-tripping
+//! the slug back gives a strongly-typed [`ChartKey`] or [`GroupKey`].
 
 use anyhow::Context as _;
 use anyhow::Result;
@@ -35,9 +34,10 @@ const PREFIX_VECTOR_SEARCH_GROUP: &str = "vsg";
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(tag = "k")]
 pub enum ChartKey {
-    /// `query_measurements` chart: `(dataset, query_idx)` per `01-schema.md`.
-    /// Group context (`dataset_variant`, `scale_factor`, `storage`) is carried
-    /// alongside so the slug fully specifies the chart.
+    /// `query_measurements` chart: `(dataset, query_idx)` is the chart key
+    /// per [`crate::schema`]. Group context (`dataset_variant`,
+    /// `scale_factor`, `storage`) is carried alongside so the slug fully
+    /// specifies the chart.
     QueryMeasurement {
         dataset: String,
         dataset_variant: Option<String>,
diff --git a/benchmarks-website/server/static/chart-init.js b/benchmarks-website/server/static/chart-init.js
index 88244baa0d0..f0289716265 100644
--- a/benchmarks-website/server/static/chart-init.js
+++ b/benchmarks-website/server/static/chart-init.js
@@ -10,24 +10,26 @@
 //   3. Per-card DOM contract           — every `data-role` selector.
 //   4. Global filter state             — engines/formats from the navbar.
 //   5. Palette + helpers               — colours, formatting, throttle.
-//   6. LTTB                            — pure largest-triangle downsampler.
-//   7. Crosshair plugin                — inline Chart.js plugin.
-//   8. External tooltip handler        — factory that returns a Chart.js
+//   6. Display unit picker             — bytes/time/count formatter switch.
+//   7. LTTB                            — pure largest-triangle downsampler.
+//   8. Crosshair plugin                — inline Chart.js plugin.
+//   9. External tooltip handler        — factory that returns a Chart.js
 //                                        external tooltip handler.
-//   9. Payload + datasets              — readInlinePayload, buildDatasets,
+//  10. Payload + datasets              — readInlinePayload, buildDatasets,
 //                                        rebuildVisibleAndUpdate.
-//  10. Lazy refetch                    — maybeRefetchFullPayload,
-//                                        replaceChartPayload.
-//  11. Slider + badge sync             — syncSliderFromRange,
-//                                        syncDownsampleBadge.
+//  11. Lazy refetch                    — maybeRefetchFullPayload,
+//                                        replaceChartPayload, plus the
+//                                        slider + downsample-badge sync
+//                                        helpers.
 //  12. Per-card construction           — constructChart.
 //  13. Range scrollbar strip           — bindRangeStrip + pointer math.
-//  14. Toolbar + wheel pan             — bindToolbar, attachWheelPan,
+//  14. Per-chart toolbar wiring        — bindToolbar, attachWheelPan,
 //                                        applyScope, applyY.
 //  15. Lazy fetch on details.toggle    — fetchAndConstruct + UI helpers.
 //  16. Global filter wiring            — chip toggle, URL sync, bindings.
-//  17. Header controls                 — theme toggle, expand/collapse all.
-//  18. Page wiring                     — IntersectionObserver, init.
+//  17. Per-group toolbar wiring        — group-level filter + Y override.
+//  18. Header controls                 — theme toggle, expand/collapse all.
+//  19. Page wiring                     — IntersectionObserver, init.
 //
 // Per-chart UX (for orientation):
 //   - Each `.chart-card` carries `data-chart-slug`. The card *owns* its own
@@ -89,6 +91,16 @@
 //                                     toggle handler calls `chart.resize()`
 //                                     on these to recompute layout once the
 //                                     container is visible.
+//   canvas.__bench_display_unit       The picked display unit (`format`,
+//                                     `axisLabel`, `multiplier`) used by the
+//                                     tooltip and y-axis label. Recomputed
+//                                     after every payload swap and after each
+//                                     LTTB rebuild changes the visible window.
+//   canvas.__bench_y_user_set         true once the user has explicitly
+//                                     clicked the per-chart Y-axis toolbar.
+//                                     The per-group Y override skips charts
+//                                     where this flag is set so the local
+//                                     click stays sticky.
 //
 // Per-card DOM contract — every selector the chart cards are queried by:
 //   .chart-card[data-chart-index][data-chart-slug]    The card itself.
@@ -140,12 +152,13 @@
     if (name && WIDE_DEFAULT_GROUPS.has(name)) return "all";
     return DEFAULT_VISIBLE;
   }
-  // Mirror of `LANDING_INLINE_N` in `server/src/html.rs`. The first group's
-  // inline JSON is capped at this many commits to keep the cold landing
-  // page small. When the user zooms wider than what's inlined we lazy-fetch
-  // `?n=all` and replace the payload in place. If you change this, update
-  // the server too — the comparison `commits.length >= LANDING_INLINE_N`
-  // is what tells us the inline payload was potentially trimmed.
+  // Mirror of `LANDING_INLINE_N` in `server/src/html/mod.rs`. The first
+  // group's inline JSON is capped at this many commits to keep the cold
+  // landing page small. When the user zooms wider than what's inlined we
+  // lazy-fetch `?n=all` and replace the payload in place. If you change
+  // this, update the server too — the comparison
+  // `commits.length >= LANDING_INLINE_N` is what tells us the inline
+  // payload was potentially trimmed.
   var LANDING_INLINE_N = 100;
   // Hard cap on how many points a single series can render at once. When
   // the visible commit range has more raw non-null points than this, we
@@ -917,7 +930,7 @@
   // Lazy-upgrade an inline-trimmed payload to the full history.
   //
   // The landing page inlines at most `LANDING_INLINE_N` commits per chart
-  // (server: `html.rs::LANDING_INLINE_N`) so the cold HTML body stays small.
+  // (server: `html/mod.rs::LANDING_INLINE_N`) so the cold HTML body stays small.
   // The first time the user zooms wide enough to ask for everything we have
   // loaded we replace the payload with the unbounded view from
   // `/api/chart/{slug}?n=all`. The chart's pan/zoom limits and the toolbar
diff --git a/benchmarks-website/server/tests/chart_api.rs b/benchmarks-website/server/tests/chart_api.rs
index b72cb3e5028..af0b5bd4e1c 100644
--- a/benchmarks-website/server/tests/chart_api.rs
+++ b/benchmarks-website/server/tests/chart_api.rs
@@ -223,8 +223,8 @@ async fn chart_payload_includes_series_meta() -> Result<()> {
 }
 
 /// Every chart payload must declare a structured `unit_kind` so the client
-/// can pick a display unit without guessing from the values. The taxonomy is
-/// documented in `planning/02-contracts.md`; this test pins the wire
+/// can pick a display unit without guessing from the values. The taxonomy
+/// lives on [`vortex_bench_server::api::UnitKind`]; this test pins the wire
 /// classification of every fact-table family currently emitted.
 #[tokio::test]
 async fn chart_payload_declares_unit_kind_per_family() -> Result<()> {
diff --git a/benchmarks-website/server/tests/common/mod.rs b/benchmarks-website/server/tests/common/mod.rs
index 949ebdeb525..be177328a6c 100644
--- a/benchmarks-website/server/tests/common/mod.rs
+++ b/benchmarks-website/server/tests/common/mod.rs
@@ -24,19 +24,19 @@ use vortex_bench_server::app::router;
 
 /// Bearer token wired into the in-process server. Test ingest calls send
 /// this in `Authorization: Bearer …`.
-pub const TOKEN: &str = "test-bearer-token";
+pub(crate) const TOKEN: &str = "test-bearer-token";
 
 /// In-process axum server bound to a random port. Drops cleanly on `Drop`.
-pub struct Server {
+pub(crate) struct Server {
     /// Loopback address the server is listening on.
-    pub addr: SocketAddr,
+    pub(crate) addr: SocketAddr,
     _tmp: TempDir,
     handle: JoinHandle<()>,
 }
 
 impl Server {
     /// Spin up an in-process server backed by a fresh temp DuckDB.
-    pub async fn start() -> Result<Self> {
+    pub(crate) async fn start() -> Result<Self> {
         let tmp = TempDir::new()?;
         let db_path = tmp.path().join("bench.duckdb");
         let state = AppState::open(&db_path, TOKEN.to_string())?;
@@ -55,7 +55,7 @@ impl Server {
     }
 
     /// Build an absolute URL for `path` against the in-process server.
-    pub fn url(&self, path: &str) -> String {
+    pub(crate) fn url(&self, path: &str) -> String {
         format!("http://{}{}", self.addr, path)
     }
 }
@@ -68,7 +68,7 @@ impl Drop for Server {
 
 /// Three synthetic commits, oldest first. Picked so the rendered output has
 /// short SHAs that are visually distinct in snapshots.
-pub fn commits() -> &'static [(&'static str, &'static str, &'static str)] {
+pub(crate) fn commits() -> &'static [(&'static str, &'static str, &'static str)] {
     &[
         (
             "1111111111111111111111111111111111111111",
@@ -90,7 +90,7 @@ pub fn commits() -> &'static [(&'static str, &'static str, &'static str)] {
 
 /// Build a fixture envelope for one commit; `value_bias` is added to each
 /// numeric measurement so successive commits produce a non-flat time series.
-pub fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value {
+pub(crate) fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value {
     json!({
         "run_meta": {
             "benchmark_id": "web-ui-fixture",
@@ -231,7 +231,7 @@ pub fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value {
 
 /// POST every fixture commit through `/api/ingest` so the test DB is
 /// pre-populated before the test exercises the read API.
-pub async fn seed(server: &Server) -> Result<()> {
+pub(crate) async fn seed(server: &Server) -> Result<()> {
     let client = reqwest::Client::new();
     for (i, (sha, ts, msg)) in commits().iter().enumerate() {
         let bias = (i as i64) * 50_000;
@@ -253,7 +253,7 @@ pub async fn seed(server: &Server) -> Result<()> {
 /// Slim ingest envelope carrying just a `random_access_time` pair so we can
 /// drive a long-history fixture cheaply (the full envelope is ~12 records;
 /// this is two). Used by the downsample tests.
-pub fn ra_envelope_for(sha: &str, ts: &str, msg: &str, bias: i64) -> Value {
+pub(crate) fn ra_envelope_for(sha: &str, ts: &str, msg: &str, bias: i64) -> Value {
     json!({
         "run_meta": {
             "benchmark_id": "downsample-fixture",
@@ -296,7 +296,7 @@ pub fn ra_envelope_for(sha: &str, ts: &str, msg: &str, bias: i64) -> Value {
 /// downsampler has something to chew on. SHAs are deterministic
 /// `{i:040x}`; timestamps are 1 minute apart starting 2025-01-01 so the
 /// commits sort stably.
-pub async fn seed_long_history(server: &Server, n: usize) -> Result<()> {
+pub(crate) async fn seed_long_history(server: &Server, n: usize) -> Result<()> {
     let client = reqwest::Client::new();
     for i in 0..n {
         let sha = format!("{i:040x}");
@@ -325,7 +325,7 @@ pub async fn seed_long_history(server: &Server, n: usize) -> Result<()> {
 
 /// Pull the inline `<script id="chart-data-N">…</script>` JSON out of an
 /// HTML body. Returns `None` if the script tag isn't present.
-pub fn extract_chart_data(body: &str, idx: usize) -> Option<Value> {
+pub(crate) fn extract_chart_data(body: &str, idx: usize) -> Option<Value> {
     let needle = format!(r#"<script id="chart-data-{idx}" type="application/json">"#);
     let start = body.find(&needle)? + needle.len();
     let end = body[start..].find("</script>")? + start;
@@ -337,7 +337,7 @@ pub fn extract_chart_data(body: &str, idx: usize) -> Option<Value> {
 /// Configure `insta` to look for snapshots in `tests/snapshots/` keyed by
 /// just the explicit name (no module prefix). Every test in this crate uses
 /// these settings so the snapshot file layout is path-independent.
-pub fn insta_settings() -> insta::Settings {
+pub(crate) fn insta_settings() -> insta::Settings {
     let mut s = insta::Settings::clone_current();
     s.set_snapshot_path("snapshots");
     s.set_prepend_module_to_snapshot(false);
@@ -347,7 +347,10 @@ pub fn insta_settings() -> insta::Settings {
 /// Lift a single chart slug from `/api/groups`, picking from a group whose
 /// name matches `predicate`. Used by tests that need a real slug to drive
 /// `/chart/{slug}` and `/api/chart/{slug}` round-trips.
-pub async fn pick_chart_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> Result<String> {
+pub(crate) async fn pick_chart_slug(
+    server: &Server,
+    predicate: impl Fn(&str) -> bool,
+) -> Result<String> {
     let client = reqwest::Client::new();
     let groups: Value = client
         .get(server.url("/api/groups"))
@@ -369,7 +372,10 @@ pub async fn pick_chart_slug(server: &Server, predicate: impl Fn(&str) -> bool)
 
 /// Lift a single group slug from `/api/groups`, picking the first group
 /// whose name matches `predicate`.
-pub async fn pick_group_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> Result<String> {
+pub(crate) async fn pick_group_slug(
+    server: &Server,
+    predicate: impl Fn(&str) -> bool,
+) -> Result<String> {
     let client = reqwest::Client::new();
     let groups: Value = client
         .get(server.url("/api/groups"))
@@ -389,7 +395,7 @@ pub async fn pick_group_slug(server: &Server, predicate: impl Fn(&str) -> bool)
 
 /// Look up a group entry by its `name` field inside an `/api/groups`
 /// response.
-pub fn group_by_name<'a>(groups: &'a Value, name: &str) -> Result<&'a Value> {
+pub(crate) fn group_by_name<'a>(groups: &'a Value, name: &str) -> Result<&'a Value> {
     groups["groups"]
         .as_array()
         .context("groups is array")?
@@ -400,7 +406,7 @@ pub fn group_by_name<'a>(groups: &'a Value, name: &str) -> Result<&'a Value> {
 
 /// Fuzzy `f64` equality for test assertions. The summary rollups round-trip
 /// through SQL so exact equality isn't safe even on integer-valued inputs.
-pub fn assert_close(actual: f64, expected: f64) {
+pub(crate) fn assert_close(actual: f64, expected: f64) {
     let delta = (actual - expected).abs();
     assert!(
         delta < 0.000_001,
@@ -412,7 +418,7 @@ pub fn assert_close(actual: f64, expected: f64) {
 /// filter dropdown — its trigger button and the chip panel. Keeps the
 /// snapshot focused on the chip markup and stable against changes elsewhere
 /// on the page.
-pub fn filter_bar_section(body: &str) -> String {
+pub(crate) fn filter_bar_section(body: &str) -> String {
     let needle = r#"<div class="filter-dropdown" data-role="global-filter-bar""#;
     let Some(start) = body.find(needle) else {
         return "<missing filter bar>".to_string();
@@ -447,7 +453,7 @@ pub fn filter_bar_section(body: &str) -> String {
 
 /// Pull the `<div class="global-filter-row">` containing chips for one
 /// dimension (`"engine"` or `"format"`).
-pub fn filter_section(body: &str, dim: &str) -> String {
+pub(crate) fn filter_section(body: &str, dim: &str) -> String {
     let bar = filter_bar_section(body);
     let needle = format!(r#"data-filter="{dim}""#);
     let Some(_) = bar.find(&needle) else {
@@ -464,7 +470,7 @@ pub fn filter_section(body: &str, dim: &str) -> String {
 }
 
 /// Pull a single chip's opening tag for assertions.
-pub fn extract_chip(section: &str, value: &str) -> String {
+pub(crate) fn extract_chip(section: &str, value: &str) -> String {
     let needle = format!(r#"data-value="{value}""#);
     let Some(idx) = section.find(&needle) else {
         return String::new();
diff --git a/benchmarks-website/server/tests/ingest.rs b/benchmarks-website/server/tests/ingest.rs
index a2cf46f8632..96018bfa417 100644
--- a/benchmarks-website/server/tests/ingest.rs
+++ b/benchmarks-website/server/tests/ingest.rs
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-//! Integration tests covering the acceptance criteria from
-//! `benchmarks-website/planning/components/server.md`.
+//! Integration tests for `POST /api/ingest` — round-trips the bearer
+//! check, the all-or-nothing transaction, the schema-version gate, and
+//! the upsert path.
 
 use std::net::SocketAddr;
 

From b7d80456ca0a68adbe963113a4b8f1243144f4d1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 6 May 2026 15:01:00 +0000
Subject: [PATCH 3/3] docs(benchmarks-website): address review feedback on
 planning-docs migration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Doc-accuracy nits surfaced in review of #7810. No behavior changes.

- `vortex-bench/src/datasets/mod.rs::Dataset::v3_dataset_dims`: drop the
  stale "Public-BI emits dataset = public-bi, dataset_variant = …"
  example. The only override (`PBIBenchmark::v3_dataset_dims`) actually
  returns `(&self.name, None)` to match the migrate classifier — the
  parent-namespace framing was historical. Reword to "Override only when
  a suite needs a different dataset name on the wire than its `name()`
  returns."
- `query_idx` field doc on both `vortex-bench/src/v3.rs` and
  `server/src/records.rs`: drop the "1-based" claim. Query indices are
  whatever the producing bench loop happens to use (ClickBench is
  0-based, others vary); both sides agree only because the migrate
  classifier parses literal digits out of `q07`-style v2 chart names.
- `server/src/app.rs`: add `/api/group/{slug}` to the read-API line; the
  router wires it but the module doc had drifted.
- `server/src/api/charts.rs::collect_group_charts`: re-introduce a
  `TODO(#7812):` marker so the N+1 follow-up is grep-discoverable.
  Tracking issue: https://github.com/vortex-data/vortex/issues/7812
- `benchmarks-website/README.md`: rewrite the cutover-plan bullet to
  reflect that v3 runs alongside v2 on the same EC2 host (v2 on `:80`,
  v3 on `:3001`), per `docker-compose.yml` and `ec2-init.txt`. The
  earlier "separate EC2 host" claim was stale.
- `server/src/ingest.rs`: add the missing 400 row for the older-than-
  expected `schema_version` case to the HTTP matrix table.
- `vortex-bench/src/v3.rs`: drop the `[`canonical_tpc_scale_factor`]`
  intra-doc link in the new dim-mapping table — the function is
  private, so `RUSTDOCFLAGS="-D warnings" cargo doc` rejected it.
- `server/src/lib.rs`: rename "PNGs" to "logos" in the route table —
  typos parsed "PN" inside "PNGs" as a typo of "ON" and failed
  `Spell Check with Typos`.

Verification:

- `cargo +nightly fmt --all`
- `cargo build -p vortex-bench-server -p vortex-bench-migrate` → clean.
- `cargo test -p vortex-bench-server -p vortex-bench-migrate` → all
  140+ tests pass.
- `cargo clippy --all-targets --all-features
   -p vortex-bench-server -p vortex-bench-migrate
   -- -W dead_code -W unreachable_pub -W unused_imports` → 0 warnings.
- `RUSTDOCFLAGS="-D warnings" cargo doc --no-deps -p vortex-bench
   -p vortex-bench-server -p vortex-bench-migrate` → clean.
- `typos .` → 0 hits.

Signed-off-by: Claude <noreply@anthropic.com>
---
 benchmarks-website/README.md                | 4 ++--
 benchmarks-website/server/src/api/charts.rs | 7 ++++---
 benchmarks-website/server/src/app.rs        | 3 ++-
 benchmarks-website/server/src/ingest.rs     | 3 ++-
 benchmarks-website/server/src/lib.rs        | 2 +-
 benchmarks-website/server/src/records.rs    | 5 ++++-
 vortex-bench/src/datasets/mod.rs            | 7 +++----
 vortex-bench/src/v3.rs                      | 7 +++++--
 8 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/benchmarks-website/README.md b/benchmarks-website/README.md
index 6460b6c07b3..63604539279 100644
--- a/benchmarks-website/README.md
+++ b/benchmarks-website/README.md
@@ -96,8 +96,8 @@ re-running `vortex-bench-migrate run --output ...` is safe.
 The work to flip `bench.vortex.dev` from v2 to v3 is tracked outside this
 repo. The relevant code-side bits:
 
-- v3 deploys today on a separate EC2 host and is exercised by CI's dual-write
-  step against a test bearer token.
+- v3 runs alongside v2 on the same EC2 host today (v2 on `:80`, v3 on
+  `:3001`) and is fed by CI's dual-write `--gh-json-v3` path.
 - v2 keeps shipping unchanged until DNS flips. **Do not touch the top-level
   v2 files unless you are doing the cleanup PR opened post-flip.**
 - The v2 cleanup PR removes everything top-level under `benchmarks-website/`
diff --git a/benchmarks-website/server/src/api/charts.rs b/benchmarks-website/server/src/api/charts.rs
index b0edfb0380c..28184279067 100644
--- a/benchmarks-website/server/src/api/charts.rs
+++ b/benchmarks-website/server/src/api/charts.rs
@@ -75,10 +75,11 @@ pub(crate) fn chart_payload(
 /// Collect every chart inside one group. Returns `None` if the group has no
 /// data at all (callers should render a 404).
 ///
-/// Known N+1: this re-runs the entire [`collect_groups`] discovery pass per
-/// call before fetching each chart, so the landing page is
+/// TODO(#7812): this re-runs the entire [`collect_groups`] discovery pass
+/// per call before fetching each chart, so the landing page is
 /// O(groups * charts_per_group) DB queries plus the discovery scan. Fine
-/// for the current dataset; the rewrite is owned outside this branch.
+/// for the current dataset; tracked for the refactor that collapses it
+/// into a single query.
 pub(crate) fn collect_group_charts(
     conn: &Connection,
     key: &GroupKey,
diff --git a/benchmarks-website/server/src/app.rs b/benchmarks-website/server/src/app.rs
index 759205431b1..d013bfe9ad7 100644
--- a/benchmarks-website/server/src/app.rs
+++ b/benchmarks-website/server/src/app.rs
@@ -4,7 +4,8 @@
 //! Axum [`Router`] composition and shared [`AppState`].
 //!
 //! The router mounts:
-//! - `/api/groups`, `/api/chart/{slug}`, `/health` (read API)
+//! - `/api/groups`, `/api/chart/{slug}`, `/api/group/{slug}`, `/health`
+//!   (read API)
 //! - `/api/ingest` (gated by [`crate::auth::require_bearer`])
 //! - HTML routes contributed by [`crate::html::router`]
 //!
diff --git a/benchmarks-website/server/src/ingest.rs b/benchmarks-website/server/src/ingest.rs
index 9b6844dca44..b97401bea8a 100644
--- a/benchmarks-website/server/src/ingest.rs
+++ b/benchmarks-website/server/src/ingest.rs
@@ -15,7 +15,8 @@
 //! | Malformed JSON or unknown field at the envelope level               | 400                                               |
 //! | Unknown `kind`, unknown record field, or per-record validation fail | 400 with the offending record's index             |
 //! | Missing or invalid bearer token                                     | 401 (raised by [`crate::auth::require_bearer`])   |
-//! | Schema version newer than this server expects                       | 409                                               |
+//! | Schema version newer than this server expects                      | 409                                               |
+//! | Schema version older than this server expects                       | 400 (via the malformed-envelope path)             |
 //! | Other server error                                                  | 500                                               |
 //!
 //! All-or-nothing semantics: a single failed record fails the whole batch
diff --git a/benchmarks-website/server/src/lib.rs b/benchmarks-website/server/src/lib.rs
index 9794f887476..3174aa50696 100644
--- a/benchmarks-website/server/src/lib.rs
+++ b/benchmarks-website/server/src/lib.rs
@@ -20,7 +20,7 @@
 //!   HTML; the rest are shells fetched on first toggle.
 //! - `GET /chart/{slug}` — single-chart permalink.
 //! - `GET /group/{slug}` — every chart in one group on a single page.
-//! - `GET /static/...` — the bundled JS / CSS / PNGs.
+//! - `GET /static/...` — the bundled JS / CSS / logos.
 //! - `GET /api/groups` — flat list of every group with chart-link metadata.
 //! - `GET /api/chart/{slug}` — one chart's payload (`commits`, `series`,
 //!   `unit_kind`, ...).
diff --git a/benchmarks-website/server/src/records.rs b/benchmarks-website/server/src/records.rs
index e71128b759e..446b701e296 100644
--- a/benchmarks-website/server/src/records.rs
+++ b/benchmarks-website/server/src/records.rs
@@ -134,7 +134,10 @@ pub struct QueryMeasurement {
     /// TPC SF as a string. Populated for TPC-H/TPC-DS, NULL elsewhere.
     #[serde(default)]
     pub scale_factor: Option<String>,
-    /// 1-based query index inside the suite.
+    /// Query index within the suite. The convention (0-based or 1-based) is
+    /// fixed per suite by the producing bench loop; the migrate classifier
+    /// matches it by parsing literal digits out of `q07`-style v2 chart
+    /// names.
     pub query_idx: i32,
     /// Storage backend the run targeted: `nvme` or `s3`. Validated on insert.
     pub storage: String,
diff --git a/vortex-bench/src/datasets/mod.rs b/vortex-bench/src/datasets/mod.rs
index d89f99d36f4..d35d3f869e0 100644
--- a/vortex-bench/src/datasets/mod.rs
+++ b/vortex-bench/src/datasets/mod.rs
@@ -40,10 +40,9 @@ pub trait Dataset {
     /// Map this dataset to the v3 `(dataset, dataset_variant)` pair emitted
     /// in `compression_*` records.
     ///
-    /// Default: `(name(), None)`. Override for suites that have a parent
-    /// namespace and a sub-dataset (e.g. Public-BI emits
-    /// `dataset = "public-bi"`, `dataset_variant = "<sub-dataset name>"`).
-    /// The query-side equivalent is documented on
+    /// Default: `(name(), None)`. Override only when a suite needs a
+    /// different dataset name on the wire than its `name()` returns. The
+    /// query-side equivalent is documented on
     /// [`crate::v3::benchmark_dataset_dims`].
     fn v3_dataset_dims(&self) -> (&str, Option<&str>) {
         (self.name(), None)
diff --git a/vortex-bench/src/v3.rs b/vortex-bench/src/v3.rs
index c2dabca1ff9..99c85314fbe 100644
--- a/vortex-bench/src/v3.rs
+++ b/vortex-bench/src/v3.rs
@@ -134,7 +134,10 @@ pub struct QueryMeasurementRecord {
     /// a per-suite scale factor.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub scale_factor: Option<String>,
-    /// 1-based query index within the suite.
+    /// Query index within the suite. The convention (0-based or 1-based) is
+    /// fixed per suite by the producing bench loop; the migrate classifier
+    /// matches it by parsing literal digits out of `q07`-style v2 chart
+    /// names.
     pub query_idx: u32,
     /// Storage backend the run targeted (`nvme` or `s3`).
     pub storage: String,
@@ -282,7 +285,7 @@ fn canonical_tpc_scale_factor(scale_factor: &str) -> String {
 ///
 /// | `BenchmarkDataset` | `dataset` | `dataset_variant` | `scale_factor` | Notes |
 /// |---|---|---|---|---|
-/// | `TpcH { scale_factor }`     | `tpch`         | `None`              | TPC SF as string (`"1"`, `"10"`, `"100"`, `"1000"`) | Run through [`canonical_tpc_scale_factor`] so `"1.0"` and `"1"` collapse. |
+/// | `TpcH { scale_factor }`     | `tpch`         | `None`              | TPC SF as string (`"1"`, `"10"`, `"100"`, `"1000"`) | Run through `canonical_tpc_scale_factor` so `"1.0"` and `"1"` collapse. |
 /// | `TpcDS { scale_factor }`    | `tpcds`        | `None`              | TPC SF as string                                    | Same canonicalization as TPC-H. |
 /// | `ClickBench { flavor: _ }`  | `clickbench`   | `None`              | `None`                                              | Migrate path drops flavor; live emitter matches so historical and live merge. |
 /// | `StatPopGen { n_rows: _ }`  | `statpopgen`   | `None`              | `None`                                              | Migrate path carries no SF for this suite; live drops it for the same reason. |