From 97584ef70b93df503424036faf3c59fd97a6aba2 Mon Sep 17 00:00:00 2001 From: genisis0x Date: Mon, 15 Jun 2026 18:11:25 +0530 Subject: [PATCH 1/2] fix(autoplan): surface scope-detection counts and stop silent phase skips Phase 0 decides whether to run the Design (Phase 2) and DX (Phase 3.5) reviews by grepping the plan for scope terms with a 2-match threshold. Below the threshold the phase was dropped silently: the run reported only a yes/no, with no match count, no near-miss warning, and no way to override. A plan that is genuinely UI- or developer-facing but phrases its terms with hyphens (form-control), uses synonyms outside the list, or buries them in code fences grep skips would lose two whole review phases without the user ever knowing why. A scope sitting at exactly two matches is one edit away from silently disappearing on the next run. Report the match count per scope, flag a below-threshold scope as a near-miss instead of a clean no, call out the borderline exactly-two case, and offer an override so the user can force a phase regardless of the count. The Phase 0 output line now carries the counts and the override note. Closes #1957 --- autoplan/SKILL.md | 15 +++++++++++++-- autoplan/SKILL.md.tmpl | 15 +++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index 49db38ff90..1e4d9b2599 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -1038,6 +1038,14 @@ Then prepend a one-line HTML comment to the plan file: a developer tool (the plan describes something developers install, integrate, or build on top of) or if an AI agent is the primary user (OpenClaw actions, Claude Code skills, MCP servers). +- Record the match COUNT for each scope, not just yes/no, and never skip a phase + silently. A scope under the 2-match threshold means the phase is dropped, so make the + miss visible: when a scope lands at 0-1 matches yet the plan plausibly touches it + (hyphenated terms like `form-control`/`api-endpoint`, synonyms not in the list, or + terms inside code fences that grep skips), flag it as a near-miss rather than a clean + no. A scope sitting at exactly 2 matches is borderline — one fewer match on a re-run + would silently drop the phase — so call that out too. In every case offer the override: + the user can tell you to force the phase and you run it regardless of the count. ### Step 3: Load skill files from disk @@ -1064,8 +1072,11 @@ Read each file using the Read tool: Follow ONLY the review-specific methodology, sections, and required outputs. -Output: "Here's what I'm working with: [plan summary]. UI scope: [yes/no]. DX scope: [yes/no]. -Loaded review skills from disk. Starting full review pipeline with auto-decisions." +Output: "Here's what I'm working with: [plan summary]. UI scope: [yes/no] ([N] matches). +DX scope: [yes/no] ([N] matches). [For any scope that is not detected or borderline (0-2 +matches), add: "Note: review will be — if +that's wrong, tell me to force it and I'll run the phase."] Loaded review skills from disk. +Starting full review pipeline with auto-decisions." --- diff --git a/autoplan/SKILL.md.tmpl b/autoplan/SKILL.md.tmpl index b2eaca9fde..fd47e5446e 100644 --- a/autoplan/SKILL.md.tmpl +++ b/autoplan/SKILL.md.tmpl @@ -203,6 +203,14 @@ Then prepend a one-line HTML comment to the plan file: a developer tool (the plan describes something developers install, integrate, or build on top of) or if an AI agent is the primary user (OpenClaw actions, Claude Code skills, MCP servers). +- Record the match COUNT for each scope, not just yes/no, and never skip a phase + silently. A scope under the 2-match threshold means the phase is dropped, so make the + miss visible: when a scope lands at 0-1 matches yet the plan plausibly touches it + (hyphenated terms like `form-control`/`api-endpoint`, synonyms not in the list, or + terms inside code fences that grep skips), flag it as a near-miss rather than a clean + no. A scope sitting at exactly 2 matches is borderline — one fewer match on a re-run + would silently drop the phase — so call that out too. In every case offer the override: + the user can tell you to force the phase and you run it regardless of the count. ### Step 3: Load skill files from disk @@ -229,8 +237,11 @@ Read each file using the Read tool: Follow ONLY the review-specific methodology, sections, and required outputs. -Output: "Here's what I'm working with: [plan summary]. UI scope: [yes/no]. DX scope: [yes/no]. -Loaded review skills from disk. Starting full review pipeline with auto-decisions." +Output: "Here's what I'm working with: [plan summary]. UI scope: [yes/no] ([N] matches). +DX scope: [yes/no] ([N] matches). [For any scope that is not detected or borderline (0-2 +matches), add: "Note: review will be — if +that's wrong, tell me to force it and I'll run the phase."] Loaded review skills from disk. +Starting full review pipeline with auto-decisions." --- From 538991f925a91309e3dcd7f8e5d8a08e12932f29 Mon Sep 17 00:00:00 2001 From: genisis0x Date: Mon, 15 Jun 2026 18:11:25 +0530 Subject: [PATCH 2/2] test(autoplan): pin Phase 0 scope diagnostic --- test/autoplan-scope-diagnostic.test.ts | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 test/autoplan-scope-diagnostic.test.ts diff --git a/test/autoplan-scope-diagnostic.test.ts b/test/autoplan-scope-diagnostic.test.ts new file mode 100644 index 0000000000..b48a593972 --- /dev/null +++ b/test/autoplan-scope-diagnostic.test.ts @@ -0,0 +1,51 @@ +/** + * /autoplan Phase 0 scope-detection diagnostic (gate tier) + * + * Phase 0 decides whether to run the Design (Phase 2) and DX (Phase 3.5) + * reviews by grepping the plan for scope terms with a 2-match threshold. + * Before #1957 a below-threshold result silently dropped the phase: no count, + * no near-miss warning, no way to override. These static checks pin the + * generated skill so the diagnostic can't quietly regress. + */ + +import { describe, test, expect } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const AUTOPLAN = fs.readFileSync(path.join(ROOT, 'autoplan', 'SKILL.md'), 'utf-8'); +const AUTOPLAN_TMPL = fs.readFileSync(path.join(ROOT, 'autoplan', 'SKILL.md.tmpl'), 'utf-8'); + +describe('autoplan Phase 0 scope diagnostic', () => { + test('detection reports the match count, not just yes/no', () => { + expect(AUTOPLAN).toContain('match COUNT'); + // The skip-visibility instruction must be present. + expect(AUTOPLAN).toMatch(/never skip a phase\s+silently/); + }); + + test('a below-threshold scope is surfaced as a near-miss, not a clean no', () => { + expect(AUTOPLAN).toContain('near-miss'); + // The known false-negative triggers from the issue are named so the agent + // knows what to look for. + expect(AUTOPLAN).toContain('form-control'); + }); + + test('an exactly-at-threshold scope is flagged as borderline', () => { + expect(AUTOPLAN).toMatch(/exactly 2 matches is borderline/); + }); + + test('the user is offered an override to force a skipped phase', () => { + expect(AUTOPLAN).toContain('tell me to force'); + }); + + test('the Phase 0 output line surfaces the counts and the override note', () => { + expect(AUTOPLAN).toMatch(/UI scope: \[yes\/no\] \(\[N\] matches\)/); + expect(AUTOPLAN).toMatch(/DX scope: \[yes\/no\] \(\[N\] matches\)/); + }); + + test('template and generated skill stay in sync on the diagnostic', () => { + for (const needle of ['match COUNT', 'near-miss', 'tell me to force']) { + expect(AUTOPLAN_TMPL).toContain(needle); + } + }); +});