diff --git a/apps/api/src/app.ts b/apps/api/src/app.ts index 7d13bd1..d579de6 100644 --- a/apps/api/src/app.ts +++ b/apps/api/src/app.ts @@ -8,6 +8,11 @@ * 4. trace-id plugin → UUIDv7 traceId on every request * 5. setErrorHandler → single error mapper for all throws * 6. store plugin → decorates fastify.store from bootStores() + * 6a. reconcile plugin → fetch + ff/rebase/escape-hatch against origin + * (between store and services so in-memory state + * is built from the post-reconciliation tree) + * 6b. push-daemon plugin → starts gitsheets push daemon + * 6c. services plugin → builds in-memory state + FTS * 7. rate-limit plugin → in-memory counters keyed per-IP + per-account * 8. idempotency plugin → in-memory map keyed by personId+key * 9. @fastify/swagger → OpenAPI 3.1 doc generation @@ -29,6 +34,7 @@ import { envJsonSchema, type Env } from './env.js'; import { mapError } from './lib/errors.js'; import traceIdPlugin from './plugins/trace-id.js'; import storePlugin from './plugins/store.js'; +import reconcilePlugin from './plugins/reconcile.js'; import pushDaemonPlugin from './plugins/push-daemon.js'; import servicesPlugin from './plugins/services.js'; import rateLimitPlugin from './plugins/rate-limit.js'; @@ -111,10 +117,16 @@ export async function buildApp(opts: BuildAppOptions = {}): Promise void; +export type DataRepoLock = () => Promise; + +/** + * Create a fresh single-slot lock. Multiple callers calling `acquire()` + * (the returned function) queue FIFO; only one holds the lock at a time. + * + * The returned release function is idempotent — calling it twice releases + * exactly once. + */ +export function createDataRepoLock(): DataRepoLock { + // Tail of the promise chain. Each acquire chains a new pending promise + // onto `tail`; the previous holder's release resolves the prior tail. + let tail: Promise = Promise.resolve(); + + return async function acquire(): Promise { + let release!: () => void; + const next = new Promise((resolve) => { + release = resolve; + }); + const prior = tail; + tail = next; + await prior; + + let released = false; + return (): void => { + if (released) return; + released = true; + release(); + }; + }; +} diff --git a/apps/api/src/plugins/reconcile.ts b/apps/api/src/plugins/reconcile.ts new file mode 100644 index 0000000..58fd8d4 --- /dev/null +++ b/apps/api/src/plugins/reconcile.ts @@ -0,0 +1,163 @@ +/** + * Reconcile plugin. + * + * Replaces the data-repo reconciliation that used to live in + * `deploy/docker/entrypoint.sh`. Registered AFTER `storePlugin` (so the + * repository handle is available) and BEFORE `servicesPlugin` (so the + * in-memory state is built from the post-reconciliation tree). + * + * Behavior: + * - When `CFP_DATA_REMOTE` is unset, reconciliation is skipped entirely + * (typical for local dev against a sibling working tree with no remote). + * - Otherwise: calls `reconcileDataRepo` for the configured branch and + * logs the outcome at the appropriate level: + * - 'conflict-escaped' → ERROR with the `conflictBranch` field, so + * operators see a loud line in production logs. + * - 'fetch-failed' → WARN — non-fatal, the API still boots from + * local state. + * - everything else → INFO. + * - Any other thrown error (corrupt repo, missing branch, etc.) propagates + * and crashes the boot. k8s will restart the pod and the entrypoint will + * re-clone if needed. + * + * Decorates Fastify with: + * - `dataRepoLock` — a single-slot async lock callers use to serialize + * non-`store.transact` git operations (boot reconcile, future webhook). + * - `reconcileDataRepo({ branch })` — a thin wrapper that acquires the + * lock and invokes the state-machine function with the current + * environment. Provided so the future hot-reload webhook (#65) has a + * single call to make. + */ +import type { FastifyInstance } from 'fastify'; +import fp from 'fastify-plugin'; + +import { createDataRepoLock, type DataRepoLock } from '../lib/data-repo-lock.js'; +import { reconcileDataRepo, type ReconcileResult } from '../store/reconcile.js'; + +declare module 'fastify' { + interface FastifyInstance { + /** + * Acquire the data-repo lock. Returns a release function; release is + * idempotent. See `lib/data-repo-lock.ts` for the contract. + */ + dataRepoLock: DataRepoLock; + /** + * Reconcile the local working tree against `CFP_DATA_REMOTE` for the + * given branch under the data-repo lock. Defaults to the configured + * `CFP_DATA_BRANCH`. + * + * Returns the outcome envelope. Throws on unrecoverable filesystem / + * git errors; soft failures (fetch blip, conflict-escape) return a + * non-throwing result. + */ + reconcileDataRepo: (opts?: { branch?: string }) => Promise; + } +} + +async function reconcilePlugin(fastify: FastifyInstance): Promise { + const lock = createDataRepoLock(); + fastify.decorate('dataRepoLock', lock); + + const repoPath = fastify.config.CFP_DATA_REPO_PATH; + const configuredBranch = fastify.config.CFP_DATA_BRANCH; + const remote = fastify.config.CFP_DATA_REMOTE; + + // Expose a Fastify-bound wrapper so the future webhook handler (#65) has + // a single call to make. Always under the lock. + fastify.decorate( + 'reconcileDataRepo', + async (opts?: { branch?: string }): Promise => { + const branch = opts?.branch ?? configuredBranch; + if (!branch) { + throw new Error( + 'reconcileDataRepo: no branch specified and CFP_DATA_BRANCH is unset', + ); + } + const release = await lock(); + try { + return await reconcileDataRepo({ + repoPath, + branch, + logger: fastify.log, + }); + } finally { + release(); + } + }, + ); + + // Boot-time reconcile: skipped when no remote is configured (dev). + if (!remote) { + fastify.log.info( + 'data-repo reconciliation skipped: CFP_DATA_REMOTE unset (dev mode)', + ); + return; + } + + if (!configuredBranch) { + // Without a branch, we don't know what to reconcile against. Treat as + // a configuration error — entrypoint should set CFP_DATA_BRANCH + // alongside CFP_DATA_REMOTE. + throw new Error( + 'data-repo reconciliation: CFP_DATA_REMOTE set but CFP_DATA_BRANCH unset; refusing to guess', + ); + } + + const release = await lock(); + let result: ReconcileResult; + try { + result = await reconcileDataRepo({ + repoPath, + branch: configuredBranch, + logger: fastify.log, + }); + } finally { + release(); + } + + // Outcome-specific logging so operators get an at-a-glance line in prod. + switch (result.outcome) { + case 'conflict-escaped': + // LOUD: the operator MUST investigate the named branch. + fastify.log.error( + { + branch: configuredBranch, + conflictBranch: result.conflictBranch, + oldCommit: result.oldCommit, + newCommit: result.newCommit, + ahead: result.ahead, + behind: result.behind, + }, + 'data-repo reconciliation invoked conflict escape hatch', + ); + break; + case 'fetch-failed': + fastify.log.warn( + { branch: configuredBranch, commit: result.oldCommit }, + 'data-repo reconciliation: fetch failed; continuing with local state', + ); + break; + case 'in-sync': + case 'fast-forwarded': + case 'pushed-ahead': + case 'rebased': + fastify.log.info( + { + branch: configuredBranch, + outcome: result.outcome, + oldCommit: result.oldCommit, + newCommit: result.newCommit, + ahead: result.ahead, + behind: result.behind, + }, + 'data-repo reconciled', + ); + break; + } +} + +export default fp(reconcilePlugin, { + name: 'reconcile', + fastify: '5.x', + dependencies: ['store'], +}); diff --git a/apps/api/src/store/reconcile.ts b/apps/api/src/store/reconcile.ts new file mode 100644 index 0000000..7577b60 --- /dev/null +++ b/apps/api/src/store/reconcile.ts @@ -0,0 +1,333 @@ +/** + * Data-repo reconciliation state machine. + * + * Replaces the shell-side reconciliation that used to live in + * `deploy/docker/entrypoint.sh`. The same state machine, expressed in + * structured Node so: + * + * 1. It's callable from any boot path or future webhook handler (#65), + * not just from a shell process before exec(node). + * 2. Exit codes propagate naturally as Promise rejections — no more + * `git rebase 2>&1 | sed 's/^/ /'` swallowing the rebase exit code. + * 3. Fetch refspecs are explicit, so a single-branch `git clone --branch X` + * can still reconcile a different `Y` later (the original shell version + * relied on the implicit remote refspec written by `git clone` and broke + * when the operator changed `CFP_DATA_BRANCH`). + * + * State machine (same as the entrypoint's): + * + * local == remote → 'in-sync' + * local is ancestor of remote (behind) → ff-only merge → 'fast-forwarded' + * remote is ancestor of local (ahead) → push → 'pushed-ahead' + * (push failure is non-fatal — + * the push daemon retries) + * diverged, rebase clean → rebase + push → 'rebased' + * diverged, rebase conflicts → abort rebase, create + push + * conflicts/ branch from + * pre-rebase HEAD, hard-reset + * local to origin → + * 'conflict-escaped' + * fetch itself fails (network blip) → 'fetch-failed', no changes + * + * Unrecoverable errors (missing branch, corrupt repo, etc.) propagate as + * thrown rejections — the caller (boot plugin) lets the API crash and k8s + * restarts the pod. + * + * Authorship: any commit this function authors (the merge commit for an + * ff-only merge can't happen; rebase replays existing authors; conflict + * branch is just a ref) uses the pseudonymous "Code for Philly API" + * identity, matching the convention used by entrypoint.sh and importer.ts. + * The function `git config`s user.name / user.email at the top so any + * implicit committer (rebase rewrite) gets the right value. + */ +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; + +const exec = promisify(execFile); + +export const AUTHOR_NAME = 'Code for Philly API'; +export const AUTHOR_EMAIL = 'api@users.noreply.codeforphilly.org'; + +export type ReconcileOutcome = + | 'in-sync' + | 'fast-forwarded' + | 'pushed-ahead' + | 'rebased' + | 'conflict-escaped' + | 'fetch-failed'; + +export interface ReconcileResult { + readonly outcome: ReconcileOutcome; + readonly oldCommit: string; + readonly newCommit: string; + /** Present only when `outcome === 'conflict-escaped'`. */ + readonly conflictBranch?: string; + /** Counts ahead/behind relative to origin pre-reconciliation, when known. */ + readonly ahead?: number; + readonly behind?: number; +} + +/** + * Minimal logger contract — anything Fastify's pino logger (or a console + * shim in tests) can satisfy. Three levels are enough for this module. + */ +export interface ReconcileLogger { + info(obj: Record, msg: string): void; + warn(obj: Record, msg: string): void; + error(obj: Record, msg: string): void; +} + +export interface ReconcileOptions { + /** Absolute path to the local working tree. */ + readonly repoPath: string; + /** Branch to reconcile (must already be checked out by the caller / entrypoint). */ + readonly branch: string; + /** Remote name to fetch/push against. Default: 'origin'. */ + readonly remote?: string; + /** Logger; mirrors Fastify's pino interface. */ + readonly logger: ReconcileLogger; + /** Override the wall clock — used for conflict-branch naming in tests. */ + readonly now?: () => Date; +} + +interface GitExecResult { + readonly stdout: string; + readonly stderr: string; +} + +/** + * Run a git command in the data repo. `stderr` is captured (not piped) so + * exit codes propagate cleanly — none of the pipe-eats-exit-code class of + * bugs that bit the shell entrypoint. + * + * Throws if git exits non-zero. Most call sites in this module trap the + * throw and translate it into a structured outcome; only unrecoverable + * errors bubble out of `reconcileDataRepo`. + */ +async function git( + repoPath: string, + ...args: readonly string[] +): Promise { + return exec('git', [...args], { cwd: repoPath, maxBuffer: 32 * 1024 * 1024 }); +} + +/** + * Cast an `unknown` error to something describable. Mirrors importer.ts's + * `describe()` — keeping the shape consistent across the codebase. + */ +function describe(err: unknown): string { + if (err instanceof Error) return err.message; + return String(err); +} + +/** + * Build the `conflicts/` branch name. Mirrors the + * entrypoint's `date -u +%Y-%m-%dT%H-%M-%SZ` format so existing operator + * tooling (alerting on `conflicts/*` ref creation, etc.) keeps working. + */ +function conflictBranchName(now: Date): string { + // date -u +%Y-%m-%dT%H-%M-%SZ — same shape as the shell version. + const iso = now.toISOString(); // 2026-05-19T14:23:45.123Z + // Strip the milliseconds and replace ':' with '-' (git ref-safe). + const truncated = iso.replace(/\.\d{3}Z$/, 'Z'); + return `conflicts/${truncated.replace(/:/g, '-')}`; +} + +/** + * Reconcile the local working tree against `/`. + * + * Idempotent — calling repeatedly with no upstream changes is a no-op + * (`outcome: 'in-sync'`). + * + * Assumes the caller has already checked out `branch` in the working tree. + * (The entrypoint's surviving responsibility is the initial clone; the API's + * is the reconciliation.) + * + * Concurrency: this function MUST be called under a write mutex if there + * are any concurrent gitsheets mutations against the same repo. At boot + * there's no contention; the future webhook (#65) acquires the mutex first. + */ +export async function reconcileDataRepo( + opts: ReconcileOptions, +): Promise { + const remote = opts.remote ?? 'origin'; + const { repoPath, branch, logger } = opts; + const now = opts.now ?? ((): Date => new Date()); + + // Set a deterministic committer identity for any rewrite (rebase replays + // can mint new committer lines even though authors are preserved). + await git(repoPath, 'config', 'user.name', AUTHOR_NAME); + await git(repoPath, 'config', 'user.email', AUTHOR_EMAIL); + + // Capture pre-reconciliation HEAD for the result envelope (and for the + // conflict-escape-hatch which needs to preserve the pre-rebase ref). + const oldCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); + + // ---- Fetch ---- + // Explicit refspec — never trust the remote's implicit refspec, which a + // single-branch `git clone --branch X` writes narrowly and breaks later + // `git fetch origin Y`. This is the first of the two latent shell bugs + // obsoleted by moving into Node. + const refspec = `+refs/heads/${branch}:refs/remotes/${remote}/${branch}`; + try { + await git(repoPath, 'fetch', '--prune', remote, refspec); + } catch (err) { + logger.warn( + { err: describe(err), remote, branch }, + 'data-repo fetch failed; continuing with local state', + ); + return { outcome: 'fetch-failed', oldCommit, newCommit: oldCommit }; + } + + // ---- Compare HEAD vs / ---- + const remoteRef = `${remote}/${branch}`; + const remoteCommit = (await git(repoPath, 'rev-parse', remoteRef)).stdout.trim(); + + if (oldCommit === remoteCommit) { + logger.info({ branch, commit: oldCommit }, 'data-repo in sync with remote'); + return { outcome: 'in-sync', oldCommit, newCommit: oldCommit }; + } + + // merge-base will throw if there's no common ancestor; bubble out as a + // boot-fatal — that means the local branch and the remote share no + // history, which is "operator did something weird, fix it" territory. + const mergeBase = ( + await git(repoPath, 'merge-base', 'HEAD', remoteRef) + ).stdout.trim(); + + // Behind: fast-forward. + if (oldCommit === mergeBase) { + const behind = Number( + (await git(repoPath, 'rev-list', '--count', `HEAD..${remoteRef}`)).stdout.trim(), + ); + logger.info( + { branch, behind, from: oldCommit, to: remoteCommit }, + 'data-repo behind remote — fast-forwarding', + ); + await git(repoPath, 'merge', '--ff-only', remoteRef); + const newCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); + return { outcome: 'fast-forwarded', oldCommit, newCommit, behind }; + } + + // Ahead: push. + if (remoteCommit === mergeBase) { + const ahead = Number( + (await git(repoPath, 'rev-list', '--count', `${remoteRef}..HEAD`)).stdout.trim(), + ); + logger.info( + { branch, ahead, from: remoteCommit, to: oldCommit }, + 'data-repo ahead of remote — pushing', + ); + try { + await git(repoPath, 'push', remote, branch); + logger.info({ branch, commit: oldCommit }, 'data-repo push succeeded'); + } catch (err) { + // Non-fatal: the push daemon retries with backoff. Worst case + // operator intervention happens after the API is up. + logger.warn( + { err: describe(err), branch }, + 'data-repo push failed during reconcile; push-daemon will retry', + ); + } + return { outcome: 'pushed-ahead', oldCommit, newCommit: oldCommit, ahead }; + } + + // Diverged. + const ahead = Number( + (await git(repoPath, 'rev-list', '--count', `${remoteRef}..HEAD`)).stdout.trim(), + ); + const behind = Number( + (await git(repoPath, 'rev-list', '--count', `HEAD..${remoteRef}`)).stdout.trim(), + ); + logger.info( + { branch, ahead, behind, local: oldCommit, remote: remoteCommit }, + 'data-repo diverged from remote — attempting rebase', + ); + + try { + await git(repoPath, 'rebase', remoteRef); + } catch (err) { + // Rebase failed — escape hatch. + logger.error( + { err: describe(err), branch, ahead, behind, local: oldCommit, remote: remoteCommit }, + 'data-repo rebase conflicted — invoking escape hatch', + ); + + // Abort the in-progress rebase. If `git rebase --abort` itself fails, + // we still want to forge ahead to the conflict-branch preservation — + // log + continue. + try { + await git(repoPath, 'rebase', '--abort'); + } catch (abortErr) { + logger.warn( + { err: describe(abortErr), branch }, + 'rebase --abort itself failed; continuing escape-hatch', + ); + } + + // Preserve the pre-rebase HEAD on a uniquely-named branch so the + // operator can investigate. `branch --force` lets the reconciler stay + // idempotent even on the same-second second invocation in tests. + const conflictBranch = conflictBranchName(now()); + try { + await git(repoPath, 'branch', '--force', conflictBranch, oldCommit); + } catch (branchErr) { + // If we can't even create a ref locally, that's an unrecoverable + // filesystem-level problem. + throw new Error( + `data-repo escape hatch: failed to create ${conflictBranch} from ${oldCommit}: ${describe(branchErr)}`, + { cause: branchErr }, + ); + } + + // Push the conflict branch to origin so the operator can see it from + // GitHub. Non-fatal if push fails — the local ref is still there for + // forensic recovery via the PVC. + try { + await git(repoPath, 'push', remote, conflictBranch); + logger.error( + { branch, conflictBranch, preservedCommit: oldCommit }, + 'data-repo divergent commits preserved on remote — operator must investigate', + ); + } catch (pushErr) { + logger.error( + { + err: describe(pushErr), + branch, + conflictBranch, + preservedCommit: oldCommit, + }, + 'data-repo divergent commits preserved LOCALLY only (push failed) — operator must investigate', + ); + } + + // Hard-reset to origin so the pod boots from a known-good state. + await git(repoPath, 'reset', '--hard', remoteRef); + const newCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); + return { + outcome: 'conflict-escaped', + oldCommit, + newCommit, + conflictBranch, + ahead, + behind, + }; + } + + // Rebase succeeded — push. + const newCommit = (await git(repoPath, 'rev-parse', 'HEAD')).stdout.trim(); + logger.info( + { branch, ahead, behind, from: oldCommit, to: newCommit }, + 'data-repo rebase clean — pushing', + ); + try { + await git(repoPath, 'push', remote, branch); + logger.info({ branch, commit: newCommit }, 'data-repo push succeeded after rebase'); + } catch (err) { + logger.warn( + { err: describe(err), branch }, + 'data-repo push after rebase failed; push-daemon will retry', + ); + } + return { outcome: 'rebased', oldCommit, newCommit, ahead, behind }; +} diff --git a/apps/api/tests/data-repo-reconcile.test.ts b/apps/api/tests/data-repo-reconcile.test.ts new file mode 100644 index 0000000..69bc9e7 --- /dev/null +++ b/apps/api/tests/data-repo-reconcile.test.ts @@ -0,0 +1,380 @@ +/** + * Tests for apps/api/src/store/reconcile.ts — the data-repo reconciliation + * state machine that replaces the shell logic in deploy/docker/entrypoint.sh. + * + * Each case sets up an isolated bare "remote" git repo + a local clone, mutates + * one or both sides to provoke a specific state, then asserts the + * `reconcileDataRepo` outcome and the resulting tree. + * + * Cases covered: + * - in-sync (no-op) + * - fast-forwarded (local behind, remote has new commits) + * - pushed-ahead (local ahead, push succeeds) + * - rebased (diverged, clean rebase, push) + * - conflict-escaped (diverged, rebase aborts, conflict branch pushed) + * - fetch-failed (network blip simulated by a bogus remote URL) + * - single-branch reconcile (regression for the latent shell bug — + * `git clone --branch X` + reconcile against the + * same X still works because we always pass + * an explicit refspec) + */ +import { execFile } from 'node:child_process'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; + +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { reconcileDataRepo, type ReconcileLogger } from '../src/store/reconcile.js'; + +const exec = promisify(execFile); + +// --------------------------------------------------------------------------- +// Test scaffolding +// --------------------------------------------------------------------------- + +/** Captured log lines for assertions. */ +interface Captured { + level: 'info' | 'warn' | 'error'; + obj: Record; + msg: string; +} + +function makeLogger(): { logger: ReconcileLogger; lines: Captured[] } { + const lines: Captured[] = []; + const logger: ReconcileLogger = { + info: (obj, msg) => lines.push({ level: 'info', obj, msg }), + warn: (obj, msg) => lines.push({ level: 'warn', obj, msg }), + error: (obj, msg) => lines.push({ level: 'error', obj, msg }), + }; + return { logger, lines }; +} + +async function git(cwd: string, ...args: string[]): Promise { + const { stdout } = await exec('git', args, { cwd }); + return stdout.trim(); +} + +/** + * Initialize a tracked working tree at a tmpdir + a corresponding bare + * "remote" repo. Returns paths + cleanup. The local tree is cloned from + * the bare via filesystem URL so push/fetch round-trip locally. + * + * Both initial commits live on `main`; the local working tree is on `main`. + */ +interface TestRig { + readonly bare: string; + readonly local: string; + readonly cleanup: () => Promise; +} + +async function createRig(): Promise { + const root = await mkdtemp(join(tmpdir(), 'cfp-reconcile-')); + const bare = join(root, 'remote.git'); + const seed = join(root, 'seed'); + const local = join(root, 'local'); + + // Seed repo: produces the initial commit on `main`. + await exec('git', ['init', '-b', 'main', seed]); + await git(seed, 'config', 'user.email', 'seed@test.local'); + await git(seed, 'config', 'user.name', 'seed'); + await git(seed, 'config', 'commit.gpgsign', 'false'); + await git(seed, 'config', 'core.hooksPath', '/dev/null'); + await writeFile(join(seed, 'README'), 'initial\n'); + await git(seed, 'add', 'README'); + await git(seed, 'commit', '-m', 'initial'); + + // Bare remote. + await exec('git', ['init', '--bare', '-b', 'main', bare]); + // The bare needs to allow non-fast-forward pushes for the conflict-branch + // tests; default git allows that for branch creation but the bare needs + // `receive.denyCurrentBranch=warn` so pushes to `main` succeed (the + // bare has main checked out as HEAD). + await git(bare, 'config', 'receive.denyCurrentBranch', 'ignore'); + await exec('git', ['push', bare, 'main'], { cwd: seed }); + + // Local clone from the bare. + await exec('git', ['clone', bare, local]); + await git(local, 'config', 'user.email', 'local@test.local'); + await git(local, 'config', 'user.name', 'local'); + await git(local, 'config', 'commit.gpgsign', 'false'); + await git(local, 'config', 'core.hooksPath', '/dev/null'); + // git clone may pick up the global remote.origin.fetch refspec; make sure + // it's the standard "all branches" refspec so fetch works as expected. + // (The reconciler will override per-call anyway, but the bare/seed plumbing + // uses plain `git push`/`git fetch` from the test helpers.) + + return { + bare, + local, + cleanup: async () => { + await rm(root, { recursive: true, force: true }); + }, + }; +} + +/** Commit a file into a working tree and return the new HEAD. */ +async function commitFile( + cwd: string, + filename: string, + contents: string, + message: string, +): Promise { + await writeFile(join(cwd, filename), contents); + await git(cwd, 'add', filename); + await git(cwd, 'commit', '-m', message); + return git(cwd, 'rev-parse', 'HEAD'); +} + +/** + * Push a new commit onto the bare's `main` by way of an ephemeral working + * tree clone, so the bare advances ahead of `local`. + */ +async function advanceRemote( + rig: TestRig, + filename: string, + contents: string, + message: string, +): Promise { + const wt = `${rig.local}-remote-advance-${Date.now()}-${Math.random() + .toString(36) + .slice(2, 8)}`; + await exec('git', ['clone', rig.bare, wt]); + await git(wt, 'config', 'user.email', 'remote-advance@test.local'); + await git(wt, 'config', 'user.name', 'remote-advance'); + await git(wt, 'config', 'commit.gpgsign', 'false'); + await git(wt, 'config', 'core.hooksPath', '/dev/null'); + const head = await commitFile(wt, filename, contents, message); + await git(wt, 'push', 'origin', 'main'); + await rm(wt, { recursive: true, force: true }); + return head; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('reconcileDataRepo', () => { + let rig: TestRig; + + beforeEach(async () => { + rig = await createRig(); + }); + afterEach(async () => { + await rig.cleanup(); + }); + + it('returns "in-sync" when local HEAD matches remote', async () => { + const { logger, lines } = makeLogger(); + const before = await git(rig.local, 'rev-parse', 'HEAD'); + + const result = await reconcileDataRepo({ + repoPath: rig.local, + branch: 'main', + logger, + }); + + expect(result.outcome).toBe('in-sync'); + expect(result.oldCommit).toBe(before); + expect(result.newCommit).toBe(before); + // No errors logged. + expect(lines.find((l) => l.level === 'error')).toBeUndefined(); + }); + + it('"fast-forwarded" when local is behind remote', async () => { + const before = await git(rig.local, 'rev-parse', 'HEAD'); + const remoteHead = await advanceRemote(rig, 'remote-feature.txt', 'hi\n', 'remote: add feature'); + + const { logger, lines } = makeLogger(); + const result = await reconcileDataRepo({ + repoPath: rig.local, + branch: 'main', + logger, + }); + + expect(result.outcome).toBe('fast-forwarded'); + expect(result.oldCommit).toBe(before); + expect(result.newCommit).toBe(remoteHead); + expect(result.behind).toBe(1); + + const after = await git(rig.local, 'rev-parse', 'HEAD'); + expect(after).toBe(remoteHead); + + expect(lines.some((l) => l.msg.includes('fast-forwarding'))).toBe(true); + }); + + it('"pushed-ahead" when local is ahead of remote', async () => { + const remoteBefore = await git(rig.local, 'rev-parse', 'origin/main'); + const newHead = await commitFile(rig.local, 'local-feature.txt', 'hi\n', 'local: add feature'); + + const { logger } = makeLogger(); + const result = await reconcileDataRepo({ + repoPath: rig.local, + branch: 'main', + logger, + }); + + expect(result.outcome).toBe('pushed-ahead'); + expect(result.oldCommit).toBe(newHead); + expect(result.newCommit).toBe(newHead); + expect(result.ahead).toBe(1); + + // Remote should have advanced to local HEAD. + const remoteAfter = await git(rig.bare, 'rev-parse', 'main'); + expect(remoteAfter).toBe(newHead); + expect(remoteAfter).not.toBe(remoteBefore); + }); + + it('"rebased" when diverged with a clean rebase', async () => { + // Local commits to a file the remote will never touch. + const localHead = await commitFile( + rig.local, + 'local-only.txt', + 'L\n', + 'local: independent change', + ); + // Remote advances on a different file. + const remoteHead = await advanceRemote(rig, 'remote-only.txt', 'R\n', 'remote: independent change'); + + const { logger } = makeLogger(); + const result = await reconcileDataRepo({ + repoPath: rig.local, + branch: 'main', + logger, + }); + + expect(result.outcome).toBe('rebased'); + expect(result.oldCommit).toBe(localHead); + expect(result.newCommit).not.toBe(localHead); + expect(result.ahead).toBe(1); + expect(result.behind).toBe(1); + + // After rebase, HEAD's parent should be remoteHead (the rebase base). + const newHead = await git(rig.local, 'rev-parse', 'HEAD'); + const parent = await git(rig.local, 'rev-parse', 'HEAD^'); + expect(newHead).toBe(result.newCommit); + expect(parent).toBe(remoteHead); + + // Remote was pushed to. + const remoteAfter = await git(rig.bare, 'rev-parse', 'main'); + expect(remoteAfter).toBe(newHead); + }); + + it('"conflict-escaped" when diverged with a rebase conflict', async () => { + // Both sides touch the same file with different content — guaranteed + // rebase conflict. + const localHead = await commitFile(rig.local, 'conflict.txt', 'LOCAL\n', 'local: edit shared file'); + await advanceRemote(rig, 'conflict.txt', 'REMOTE\n', 'remote: edit shared file'); + const remoteHead = await git(rig.bare, 'rev-parse', 'main'); + + // Deterministic timestamp for the conflict branch name. + const now = new Date('2026-08-15T12:34:56.789Z'); + const expectedBranch = 'conflicts/2026-08-15T12-34-56Z'; + + const { logger, lines } = makeLogger(); + const result = await reconcileDataRepo({ + repoPath: rig.local, + branch: 'main', + logger, + now: () => now, + }); + + expect(result.outcome).toBe('conflict-escaped'); + expect(result.conflictBranch).toBe(expectedBranch); + expect(result.oldCommit).toBe(localHead); + expect(result.newCommit).toBe(remoteHead); + + // Local HEAD reset to origin/main. + const localHEAD = await git(rig.local, 'rev-parse', 'HEAD'); + expect(localHEAD).toBe(remoteHead); + + // No half-rebase left behind. + let rebaseInProgress = false; + try { + await git(rig.local, 'rev-parse', '--verify', 'REBASE_HEAD'); + rebaseInProgress = true; + } catch { + // expected — no REBASE_HEAD ref means no in-progress rebase + } + expect(rebaseInProgress).toBe(false); + + // Conflict branch pushed to remote, pointing at the original local HEAD. + const conflictRef = await git(rig.bare, 'rev-parse', `refs/heads/${expectedBranch}`); + expect(conflictRef).toBe(localHead); + + // Loud error log line emitted. + const errorLines = lines.filter((l) => l.level === 'error'); + expect(errorLines.length).toBeGreaterThan(0); + // The escape-hatch outcome should mention the conflictBranch field. + expect( + errorLines.some( + (l) => + 'conflictBranch' in l.obj || + (typeof l.msg === 'string' && l.msg.includes('conflict')), + ), + ).toBe(true); + }); + + it('"fetch-failed" when the remote is unreachable; local state preserved', async () => { + // Point origin at a bogus path. fetch will fail. + await git(rig.local, 'remote', 'set-url', 'origin', '/definitely/not/a/repo/path'); + + const before = await git(rig.local, 'rev-parse', 'HEAD'); + + const { logger, lines } = makeLogger(); + const result = await reconcileDataRepo({ + repoPath: rig.local, + branch: 'main', + logger, + }); + + expect(result.outcome).toBe('fetch-failed'); + expect(result.oldCommit).toBe(before); + expect(result.newCommit).toBe(before); + + // Local HEAD unchanged. + const after = await git(rig.local, 'rev-parse', 'HEAD'); + expect(after).toBe(before); + + // Logger received a warn line for the fetch. + expect(lines.some((l) => l.level === 'warn' && /fetch/.test(l.msg))).toBe(true); + }); + + it('reconciles correctly after a single-branch clone (regression for the shell bug)', async () => { + // The shell entrypoint used `git clone --branch X` then `git fetch origin Y` + // which wrote a narrow remote refspec and quietly failed to populate + // refs/remotes/origin/Y. Our Node reconciler always passes an explicit + // refspec — this test exercises a clone-with-a-narrow-refspec setup and + // confirms reconciliation still works. + + // Start a fresh local from scratch, with a narrow remote.origin.fetch + // refspec that mimics what `git clone --single-branch --branch main` writes. + const narrowLocal = `${rig.local}-narrow`; + await exec('git', ['clone', '--single-branch', '--branch', 'main', rig.bare, narrowLocal]); + await git(narrowLocal, 'config', 'user.email', 'narrow@test.local'); + await git(narrowLocal, 'config', 'user.name', 'narrow'); + await git(narrowLocal, 'config', 'commit.gpgsign', 'false'); + await git(narrowLocal, 'config', 'core.hooksPath', '/dev/null'); + + // Advance the remote on `main`. + const newRemoteHead = await advanceRemote( + rig, + 'narrow-feature.txt', + 'hi\n', + 'remote: advance after narrow clone', + ); + + const { logger } = makeLogger(); + const result = await reconcileDataRepo({ + repoPath: narrowLocal, + branch: 'main', + logger, + }); + + expect(result.outcome).toBe('fast-forwarded'); + expect(result.newCommit).toBe(newRemoteHead); + + await rm(narrowLocal, { recursive: true, force: true }); + }); +}); diff --git a/deploy/docker/entrypoint.sh b/deploy/docker/entrypoint.sh index 1f419c5..4826f83 100755 --- a/deploy/docker/entrypoint.sh +++ b/deploy/docker/entrypoint.sh @@ -1,18 +1,20 @@ #!/bin/sh # CodeForPhilly API entrypoint. # -# On pod start: -# 1. Ensures a workable clone of CFP_DATA_REMOTE exists at CFP_DATA_REPO_PATH. -# 2. Reconciles local commits (made by the previous pod's runtime that the -# push daemon hadn't yet pushed) with origin: -# - in sync → no-op -# - behind → fast-forward -# - ahead → push pending commits to origin -# - diverged + clean rebase → rebase + push -# - diverged + conflicts → push a `conflicts/` branch -# to origin for operator review, then hard-reset local to origin so -# the pod boots from a known-good state. Never silently drops work. -# 3. exec the API. +# Minimal boot prep. The data-repo reconciliation state machine +# (in-sync / behind / ahead / diverged-clean-rebase / diverged-conflict-escape) +# lives in the Node API process now — see apps/api/src/store/reconcile.ts +# and apps/api/src/plugins/reconcile.ts. This script only ensures: +# +# 1. The PVC mount at CFP_DATA_REPO_PATH is trusted by git regardless of +# file-ownership (PVCs survive pod restarts and may carry files owned +# by a different uid than the current runAsUser). +# 2. A reasonable git user identity is configured for any rebase committer +# writes (rebase preserves authors of replayed commits; the committer +# line is the only thing that can pick up runtime identity). +# 3. There IS a valid `.git` working tree at CFP_DATA_REPO_PATH. On first +# pod boot (empty PVC), we do an initial full-history clone. On +# subsequent boots, the reconciler inside the API decides what to do. # # Required env: # CFP_DATA_REPO_PATH — local working-tree path (mounted PVC in k8s) @@ -20,17 +22,8 @@ # CFP_DATA_REMOTE — git URL to clone/fetch/push. If unset, the entrypoint # assumes an offline-style dev setup and uses whatever # working tree is already at CFP_DATA_REPO_PATH. -# CFP_DATA_BRANCH — branch to track (default: main). +# CFP_DATA_BRANCH — branch to clone initially (default: main). # GIT_SSH_COMMAND — set when an SSH deploy key is mounted. -# -# Failure modes: -# - Fetch failures are non-fatal — log + continue with local state. The -# push-daemon retries on its schedule. -# - Push failures during reconciliation are non-fatal — the push-daemon -# retries once the API starts. -# - Rebase conflicts trigger the escape hatch (conflict branch + hard reset). -# The API still boots; the operator investigates the named branch. -# - Anything else (clone failure, etc.) crashes the container; k8s restarts. set -eu @@ -47,144 +40,48 @@ DATA_BRANCH="${CFP_DATA_BRANCH:-main}" # runAsUser (e.g., an earlier iteration ran as root). git config --global --add safe.directory "$CFP_DATA_REPO_PATH" -# Identity for any direct git operations made by the entrypoint (rebase -# preserves authors of existing commits; this just covers the committer when -# rebase actually rewrites a commit). API mutations supply their own GIT_AUTHOR_* -# via gitsheets transaction options. +# Pseudonymous identity for any direct git operations that pick up the +# runtime committer line. API mutations supply their own GIT_AUTHOR_* via +# gitsheets transaction options; the reconciler re-applies these to the +# repo-local config too, so this is belt-and-suspenders for any other tool +# that touches the tree. : "${GIT_AUTHOR_NAME:=CodeForPhilly API}" : "${GIT_AUTHOR_EMAIL:=api@users.noreply.codeforphilly.org}" : "${GIT_COMMITTER_NAME:=$GIT_AUTHOR_NAME}" : "${GIT_COMMITTER_EMAIL:=$GIT_AUTHOR_EMAIL}" export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL -# --------------------------------------------------------------------------- -# Reconcile against origin. Returns 0 on success or a soft failure; only -# unrecoverable filesystem/clone errors propagate via `set -e`. -# --------------------------------------------------------------------------- -reconcile() { - cd "$CFP_DATA_REPO_PATH" - - git config user.name "$GIT_AUTHOR_NAME" - git config user.email "$GIT_AUTHOR_EMAIL" - git remote set-url origin "$CFP_DATA_REMOTE" - - # Unshallow if a previous clone used --depth=1; the reconciliation logic - # below needs the merge-base to be reachable. - if [ -f .git/shallow ]; then - log "unshallowing existing clone (needed for rebase)" - git fetch --unshallow origin "$DATA_BRANCH" 2>&1 | sed 's/^/ /' || \ - log "WARN: --unshallow failed; continuing with shallow history" - fi - - if ! git fetch --prune origin "$DATA_BRANCH" 2>&1 | sed 's/^/ /'; then - log "WARN: fetch failed; skipping reconciliation, using local state" - return 0 - fi - - # Ensure we're on the branch. - if git rev-parse --verify "refs/heads/$DATA_BRANCH" >/dev/null 2>&1; then - git checkout "$DATA_BRANCH" 2>&1 | sed 's/^/ /' - else - git checkout -b "$DATA_BRANCH" "origin/$DATA_BRANCH" 2>&1 | sed 's/^/ /' - fi - - LOCAL=$(git rev-parse HEAD) - REMOTE=$(git rev-parse "origin/$DATA_BRANCH") - if ! BASE=$(git merge-base HEAD "origin/$DATA_BRANCH" 2>/dev/null); then - log "WARN: no merge-base with origin/$DATA_BRANCH; resetting to origin" - git reset --hard "origin/$DATA_BRANCH" 2>&1 | sed 's/^/ /' - return 0 - fi - - if [ "$LOCAL" = "$REMOTE" ]; then - log "in sync with origin/$DATA_BRANCH" - return 0 - fi - - if [ "$LOCAL" = "$BASE" ]; then - log "behind origin/$DATA_BRANCH — fast-forwarding" - git merge --ff-only "origin/$DATA_BRANCH" 2>&1 | sed 's/^/ /' - return 0 - fi - - if [ "$REMOTE" = "$BASE" ]; then - AHEAD=$(git rev-list --count "origin/$DATA_BRANCH..HEAD") - log "ahead of origin/$DATA_BRANCH by ${AHEAD} commit(s) — pushing" - if git push origin "$DATA_BRANCH" 2>&1 | sed 's/^/ /'; then - log "push succeeded" - else - log "WARN: push failed; push-daemon will retry once API starts" - fi - return 0 +if [ ! -d "$CFP_DATA_REPO_PATH/.git" ]; then + if [ -z "${CFP_DATA_REMOTE:-}" ]; then + log "ERROR: $CFP_DATA_REPO_PATH is not a git repo and CFP_DATA_REMOTE is unset" + exit 1 fi - # Diverged: local has commits that origin doesn't AND origin has commits - # that local doesn't. Attempt a rebase; if it conflicts, escape-hatch. - AHEAD=$(git rev-list --count "origin/$DATA_BRANCH..HEAD") - BEHIND=$(git rev-list --count "HEAD..origin/$DATA_BRANCH") - log "diverged from origin/$DATA_BRANCH (ahead=${AHEAD}, behind=${BEHIND}) — rebasing" - - if git rebase "origin/$DATA_BRANCH" 2>&1 | sed 's/^/ /'; then - log "rebase clean — pushing" - if git push origin "$DATA_BRANCH" 2>&1 | sed 's/^/ /'; then - log "push succeeded" - else - log "WARN: push failed; push-daemon will retry once API starts" - fi - return 0 - fi + mkdir -p "$CFP_DATA_REPO_PATH" - # Conflict — escape hatch. - CONFLICT_BRANCH="conflicts/$(date -u +%Y-%m-%dT%H-%M-%SZ)" - log "ERROR: rebase conflict on $DATA_BRANCH — invoking escape hatch" - git rebase --abort 2>&1 | sed 's/^/ /' || true - log "preserving pre-rebase HEAD ($LOCAL) at $CONFLICT_BRANCH" - git branch "$CONFLICT_BRANCH" "$LOCAL" - if git push origin "$CONFLICT_BRANCH" 2>&1 | sed 's/^/ /'; then - log "pushed $CONFLICT_BRANCH to origin — operator must investigate" - else - log "WARN: failed to push $CONFLICT_BRANCH; diverged commits preserved only in this PVC's reflog" + # PVC may carry residue from a previous pod that bailed mid-clone. + # `git clone` refuses to clone into a non-empty directory, so wipe it + # first. Safe because the data repo is always re-cloneable. + if [ -n "$(ls -A "$CFP_DATA_REPO_PATH" 2>/dev/null)" ]; then + log "$CFP_DATA_REPO_PATH non-empty but lacks .git — wiping before clone" + find "$CFP_DATA_REPO_PATH" -mindepth 1 -maxdepth 1 -exec rm -rf {} + fi - log "resetting $DATA_BRANCH to origin/$DATA_BRANCH" - git reset --hard "origin/$DATA_BRANCH" 2>&1 | sed 's/^/ /' - return 0 -} -if [ -z "${CFP_DATA_REMOTE:-}" ]; then - if [ -d "$CFP_DATA_REPO_PATH/.git" ]; then - log "CFP_DATA_REMOTE unset; using existing working tree at $CFP_DATA_REPO_PATH" - cd "$CFP_DATA_REPO_PATH" - git config user.name "$GIT_AUTHOR_NAME" - git config user.email "$GIT_AUTHOR_EMAIL" - cd - >/dev/null - else - log "ERROR: CFP_DATA_REMOTE is unset and $CFP_DATA_REPO_PATH is not a git repo" - exit 1 - fi -else - mkdir -p "$CFP_DATA_REPO_PATH" + log "cloning $CFP_DATA_REMOTE into $CFP_DATA_REPO_PATH (branch=$DATA_BRANCH)" + # Full history (no --depth) so the API-side reconciler can rebase against + # any realistic divergence on subsequent boots. + git clone --branch "$DATA_BRANCH" "$CFP_DATA_REMOTE" "$CFP_DATA_REPO_PATH" +fi - if [ -d "$CFP_DATA_REPO_PATH/.git" ]; then - log "reconciling existing data repo at $CFP_DATA_REPO_PATH (branch=$DATA_BRANCH)" - reconcile - cd - >/dev/null || true - else - # PVC may carry residue from a previous pod that bailed mid-clone. - # `git clone` refuses to clone into a non-empty directory, so wipe it - # first. Safe because the data repo is always re-cloneable. - if [ -n "$(ls -A "$CFP_DATA_REPO_PATH" 2>/dev/null)" ]; then - log "$CFP_DATA_REPO_PATH non-empty but lacks .git — wiping before clone" - find "$CFP_DATA_REPO_PATH" -mindepth 1 -maxdepth 1 -exec rm -rf {} + - fi - log "cloning $CFP_DATA_REMOTE into $CFP_DATA_REPO_PATH (branch=$DATA_BRANCH)" - # Full history (no --depth) so subsequent reconciliations can rebase. - git clone --branch "$DATA_BRANCH" "$CFP_DATA_REMOTE" "$CFP_DATA_REPO_PATH" - cd "$CFP_DATA_REPO_PATH" - git config user.name "$GIT_AUTHOR_NAME" - git config user.email "$GIT_AUTHOR_EMAIL" - cd - >/dev/null - fi +cd "$CFP_DATA_REPO_PATH" +git config user.name "$GIT_AUTHOR_NAME" +git config user.email "$GIT_AUTHOR_EMAIL" +# Ensure the origin URL matches the current env (in case CFP_DATA_REMOTE +# was rotated). Idempotent. +if [ -n "${CFP_DATA_REMOTE:-}" ] && git remote get-url origin >/dev/null 2>&1; then + git remote set-url origin "$CFP_DATA_REMOTE" fi +cd - >/dev/null -log "data repo ready; starting API" +log "data repo ready; starting API (reconciliation runs inside the API process)" exec "$@" diff --git a/docs/operations/deploy.md b/docs/operations/deploy.md index 6a1ef86..d1c0ecc 100644 --- a/docs/operations/deploy.md +++ b/docs/operations/deploy.md @@ -101,26 +101,52 @@ curl http://localhost:3001/ # SPA index.html ## Boot sequence -The container entrypoint (`deploy/docker/entrypoint.sh`) reconciles the -data-repo working tree with origin before exec'ing the API. See the -"Smart entrypoint reconciliation" commit message in `git log -deploy/docker/entrypoint.sh` for the full state machine; in short: - -- in sync → no-op -- behind → fast-forward -- ahead → push (push daemon retries on failure) -- diverged + clean rebase → rebase + push -- diverged + conflicts → push a `conflicts/` branch to origin - and hard-reset local to origin +The container entrypoint (`deploy/docker/entrypoint.sh`) only handles the +bits that *must* run before the Node process exists: + +- Trusts the PVC mount via `git config --global safe.directory`. +- Sets a pseudonymous git identity (`CodeForPhilly API + `) for any committer line a future + rebase might write. +- On first pod boot — and only then — does a full-history `git clone` of + `CFP_DATA_REMOTE` into `CFP_DATA_REPO_PATH` when no `.git` directory + exists. On subsequent boots the PVC already holds a clone; no clone is + performed. +- Refreshes `origin`'s URL to whatever `CFP_DATA_REMOTE` is set to (lets + operators rotate the remote without re-cloning the PVC). +- `exec`s the API. That's all — about a dozen lines of shell now. Then `exec node apps/api/dist/index.js`. Inside node, `buildApp()` registers plugins ([apps/api/src/app.ts](../../apps/api/src/app.ts)) in order: env → CORS → cookies → trace IDs → error mapper → **store** (loads public + -private into memory) → **push daemon** (starts pushing transact'd commits to +private into memory) → **reconcile** (fetch + ff/rebase/escape-hatch against +origin — see below) → **push daemon** (starts pushing transact'd commits to `CFP_DATA_REMOTE`) → services (FTS) → rate limit → idempotency → session middleware → swagger → routes → static SPA. Fastify's `listen()` doesn't fire until all of those resolve, so once `/api/health/ready` returns 200 -both stores have loaded. +both stores have loaded **and** the working tree has been reconciled with +origin. + +### Reconciliation state machine + +Lives in [`apps/api/src/store/reconcile.ts`](../../apps/api/src/store/reconcile.ts) +and is invoked at boot by the reconcile plugin. Same state machine the +shell used to run, just structured Node so exit codes propagate naturally +and the same code is reusable from the future hot-reload webhook (#65): + +- in sync → no-op (`'in-sync'`) +- behind → fast-forward (`'fast-forwarded'`) +- ahead → push (`'pushed-ahead'`; push daemon retries on push failure) +- diverged + clean rebase → rebase + push (`'rebased'`) +- diverged + conflicts → abort rebase, create + push a + `conflicts/` branch from the pre-rebase HEAD, hard-reset + local to origin (`'conflict-escaped'`; logged at ERROR level so operators + see it in production logs) +- fetch itself fails (network blip) → log warn, continue with local state + (`'fetch-failed'`) + +When `CFP_DATA_REMOTE` is unset (typical local dev), the reconcile plugin +skips reconciliation entirely. ## Probes @@ -133,9 +159,10 @@ both stores have loaded. ## Data repo on disk The API operates on a working tree at `/app/data` backed by a PVC. The -entrypoint reconciles that tree with `CFP_DATA_REMOTE` on every boot; the -push daemon pushes commits made during the pod's lifetime back to the -remote. +entrypoint ensures the working tree exists (cloning on first boot); the +API-side reconcile plugin then synchronizes that tree with `CFP_DATA_REMOTE` +on every boot, and the push daemon pushes commits made during the pod's +lifetime back to the remote. Implications: