Skip to content

Commit 1701bde

Browse files
committed
chore(release): cut 2.4.1 with runtime stabilization fixes
Stabilize local run execution by fixing HITL prompt flow, context propagation, model/error handling, and export readability. This release also adds web-first research tool routing and optional auto-export after runs for faster operator workflows. Made-with: Cursor
1 parent 9e83b26 commit 1701bde

File tree

20 files changed

+663
-87
lines changed

20 files changed

+663
-87
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [2.4.1] — 2026-03-31
11+
12+
### Fixed
13+
14+
- **HITL prompt loop + context handoff** — Clarification responses are now reused across subtasks, repeated clarification prompts are deduplicated/capped, and follow-up tasks inherit prior user decisions instead of re-asking equivalent questions.
15+
- **Run view prompt stability** — Local clarification prompts now pause/resume live rendering cleanly, reducing TUI corruption and preventing hidden/stuck prompts during interactive runs.
16+
- **Silent-failure propagation** — Agent failures now surface as explicit task failures (instead of empty-success paths), with clearer errors for missing model configuration.
17+
- **Executor compatibility regression** — Restored `budget_manager` compatibility on the runtime executor path to avoid end-of-run `AttributeError` crashes.
18+
19+
### Changed
20+
21+
- **Research tool routing defaults** — Tool selection now biases web-first for research/business discovery tasks when no local dataset path is provided.
22+
- **Export readability + content coverage** — Run export now backfills task outputs from tool traces when direct task result payloads are missing, and includes richer run details in markdown/html/latex/docx outputs.
23+
- **Duration formatting** — CLI run summary and export outputs now prefer `HH:MM:SS` format.
24+
- **Telemetry noise control** — Console span export is now opt-in (`DEVSPER_OTEL_CONSOLE=1`) to keep interactive TUI output clean by default.
25+
- **Auto-export workflow** — Added config-driven post-run export option (`[export] auto_export_on_run`) with selectable output format.
26+
1027
## [2.4.0] — 2026-03-31
1128

1229
### Added

devsper/agents/agent.py

Lines changed: 238 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,176 @@ def __init__(
594594
self.audit_logger = audit_logger
595595
self.audit_run_id = audit_run_id or ""
596596
self.clarification_requester = None # set by executor for human-in-the-loop
597+
self._hitl_response_cache: dict[str, dict] = {}
598+
self._hitl_prompt_counts: dict[str, int] = {}
599+
self._hitl_shared_answers: dict[str, str] = {}
600+
self._hitl_shared_facts: list[str] = []
601+
try:
602+
self._hitl_max_prompts_per_task = int(
603+
str(os.environ.get("DEVSPER_HITL_MAX_PROMPTS_PER_TASK", "1")).strip()
604+
or "1"
605+
)
606+
except Exception:
607+
self._hitl_max_prompts_per_task = 1
608+
self._hitl_max_prompts_per_task = max(0, self._hitl_max_prompts_per_task)
609+
610+
def _default_answer_for_field(self, field: ClarificationField) -> object:
611+
prior = self._find_prior_hitl_answer(field.question)
612+
if prior:
613+
return prior
614+
if field.default is not None and str(field.default).strip() != "":
615+
return field.default
616+
ftype = str(field.type or "text")
617+
options = field.options or []
618+
if ftype == "confirm":
619+
return True
620+
if ftype == "multi_select":
621+
return [options[0]] if options else []
622+
if ftype == "rank":
623+
return options
624+
if ftype == "mcq":
625+
return options[0] if options else "Proceed with best-effort defaults"
626+
return "Proceed with best-effort defaults"
627+
628+
@staticmethod
629+
def _normalize_question(text: str) -> str:
630+
t = re.sub(r"[^a-z0-9\s]+", " ", (text or "").lower())
631+
return re.sub(r"\s+", " ", t).strip()
632+
633+
def _find_prior_hitl_answer(self, question: str) -> str | None:
634+
qn = self._normalize_question(question)
635+
if not qn:
636+
return None
637+
if qn in self._hitl_shared_answers:
638+
return self._hitl_shared_answers[qn]
639+
q_tokens = {t for t in qn.split() if len(t) > 2}
640+
if not q_tokens:
641+
return None
642+
best_overlap = 0
643+
best_answer: str | None = None
644+
for prev_qn, prev_answer in self._hitl_shared_answers.items():
645+
p_tokens = {t for t in prev_qn.split() if len(t) > 2}
646+
overlap = len(q_tokens.intersection(p_tokens))
647+
if overlap >= 3 and overlap > best_overlap:
648+
best_overlap = overlap
649+
best_answer = prev_answer
650+
return best_answer
651+
652+
def _record_hitl_answers(self, answers: dict[str, object]) -> None:
653+
for question, answer in (answers or {}).items():
654+
qn = self._normalize_question(str(question))
655+
if not qn:
656+
continue
657+
answer_text = str(answer)
658+
self._hitl_shared_answers[qn] = answer_text
659+
fact = f"- {str(question).strip()}: {answer_text}"
660+
if fact not in self._hitl_shared_facts:
661+
self._hitl_shared_facts.append(fact)
662+
663+
def _should_auto_web_collection(
664+
self,
665+
request: AgentRequest,
666+
tool_args: dict,
667+
fields: list[ClarificationField],
668+
) -> bool:
669+
prior_answers_text = " ".join(self._hitl_shared_answers.values()).lower()
670+
prior_facts_text = " ".join(self._hitl_shared_facts).lower()
671+
topic = " ".join(
672+
[
673+
str(self.user_task or ""),
674+
str(getattr(request.task, "description", "") or ""),
675+
str(tool_args.get("context") or ""),
676+
" ".join(f.question for f in fields),
677+
prior_answers_text,
678+
prior_facts_text,
679+
]
680+
).lower()
681+
# If this is clearly a research/data-collection workflow asking for local datasets,
682+
# default to web/public-source collection to avoid blocking the run.
683+
needs_dataset_input = any(
684+
token in topic
685+
for token in (
686+
"dataset",
687+
"csv",
688+
"upload",
689+
"file",
690+
"local path",
691+
"paste",
692+
"provide the data",
693+
"provide the dataset",
694+
)
695+
)
696+
research_like = any(
697+
token in topic
698+
for token in (
699+
"research",
700+
"public source",
701+
"web",
702+
"openai business",
703+
"data collection",
704+
"comprehensive search",
705+
"business aspect",
706+
)
707+
)
708+
prior_prefers_web = any(
709+
token in topic
710+
for token in (
711+
"public urls",
712+
"public url",
713+
"public-source",
714+
"public source",
715+
"web collection",
716+
"curated list",
717+
"crawl",
718+
"fetch",
719+
"url",
720+
)
721+
)
722+
return needs_dataset_input and (research_like or prior_prefers_web)
723+
724+
def _auto_web_collection_answers(
725+
self,
726+
fields: list[ClarificationField],
727+
) -> dict[str, object]:
728+
answers: dict[str, object] = {}
729+
for f in fields:
730+
q = (f.question or "").lower()
731+
opts = [str(o) for o in (f.options or [])]
732+
# Prefer choices indicating web/public/source fetching.
733+
preferred = None
734+
for opt in opts:
735+
ol = opt.lower()
736+
if any(
737+
t in ol
738+
for t in (
739+
"web",
740+
"public",
741+
"url",
742+
"crawl",
743+
"fetch",
744+
"curated list",
745+
"propose",
746+
)
747+
):
748+
preferred = opt
749+
break
750+
if preferred is not None:
751+
answers[f.question] = preferred
752+
continue
753+
# If there is no explicit web/public option, avoid blocking on local file uploads.
754+
if any(t in q for t in ("dataset", "profile", "csv", "file", "path")):
755+
template_opt = next(
756+
(o for o in opts if any(k in o.lower() for k in ("template", "generic", "sample", "synthetic"))),
757+
None,
758+
)
759+
if template_opt is not None:
760+
answers[f.question] = template_opt
761+
continue
762+
if "how many" in q or "count" in q or "limit" in q:
763+
answers[f.question] = "Up to 50 public URLs"
764+
continue
765+
answers[f.question] = self._default_answer_for_field(f)
766+
return answers
597767

598768
def run(self, request: AgentRequest) -> AgentResponse:
599769
"""Stateless run: all context in AgentRequest, all output in AgentResponse."""
@@ -770,6 +940,12 @@ def build_request(
770940
message_bus_section = self.message_bus.get_context_sync(task.id) or ""
771941
if message_bus_section:
772942
memory_section = (memory_section + "\n\n" + message_bus_section).strip()
943+
if self._hitl_shared_facts:
944+
memory_section = (
945+
memory_section
946+
+ "\n\nUSER CLARIFICATIONS (APPLY THESE PREFERENCES ACROSS SUBTASKS)\n"
947+
+ "\n".join(self._hitl_shared_facts[-20:])
948+
).strip()
773949
from devsper.agents.roles import get_role_config
774950

775951
role_config = get_role_config(getattr(task, "role", None))
@@ -857,6 +1033,8 @@ def run_task(
8571033
success=True,
8581034
)
8591035
self.apply_response(task, response)
1036+
if not response.success:
1037+
raise RuntimeError(response.error or "Agent run failed")
8601038
return response.result
8611039

8621040
def _enrich_task(self, task: Task, answers: dict) -> Task:
@@ -1223,6 +1401,57 @@ def _run_hitl_tool_call(self, request: AgentRequest, tool_args: dict) -> str:
12231401
if not fields:
12241402
return json.dumps({"error": "hitl.request has no valid fields"})
12251403

1404+
if self._should_auto_web_collection(request, tool_args, fields):
1405+
auto_answers = self._auto_web_collection_answers(fields)
1406+
self._record_hitl_answers(auto_answers)
1407+
payload = {
1408+
"request_id": f"auto-web-{request.task.id}",
1409+
"skipped": False,
1410+
"answers": auto_answers,
1411+
"auto_assumed": True,
1412+
}
1413+
return json.dumps(payload)
1414+
1415+
task_id = request.task.id
1416+
used_prompts = int(self._hitl_prompt_counts.get(task_id, 0))
1417+
if used_prompts >= max(0, self._hitl_max_prompts_per_task):
1418+
# Fast-path: avoid repeated user interruption after first clarification.
1419+
auto_answers: dict[str, object] = {}
1420+
for f in fields:
1421+
auto_answers[f.question] = self._default_answer_for_field(f)
1422+
self._record_hitl_answers(auto_answers)
1423+
payload = {
1424+
"request_id": f"auto-{task_id}-{used_prompts + 1}",
1425+
"skipped": False,
1426+
"answers": auto_answers,
1427+
"auto_assumed": True,
1428+
}
1429+
return json.dumps(payload)
1430+
1431+
# Prevent repeated identical prompts in the same task/tool loop.
1432+
signature_payload = {
1433+
"task_id": request.task.id,
1434+
"context": str(tool_args.get("context") or "Need user input"),
1435+
"fields": [
1436+
{
1437+
"type": f.type,
1438+
"question": f.question,
1439+
"options": f.options or [],
1440+
"default": f.default,
1441+
"required": bool(f.required),
1442+
}
1443+
for f in fields
1444+
],
1445+
}
1446+
cache_key = json.dumps(signature_payload, sort_keys=True)
1447+
cached = self._hitl_response_cache.get(cache_key)
1448+
if cached is not None:
1449+
try:
1450+
self._record_hitl_answers(cached.get("answers") or {})
1451+
except Exception:
1452+
pass
1453+
return json.dumps(cached)
1454+
12261455
req = ClarificationRequest(
12271456
request_id=str(uuid.uuid4()),
12281457
task_id=request.task.id,
@@ -1233,13 +1462,15 @@ def _run_hitl_tool_call(self, request: AgentRequest, tool_args: dict) -> str:
12331462
timeout_seconds=int(tool_args.get("timeout_seconds", 120)),
12341463
)
12351464
resp = requester.request_clarification(req)
1236-
return json.dumps(
1237-
{
1238-
"request_id": req.request_id,
1239-
"skipped": bool(resp.skipped),
1240-
"answers": resp.answers or {},
1241-
}
1242-
)
1465+
self._hitl_prompt_counts[task_id] = used_prompts + 1
1466+
self._record_hitl_answers(resp.answers or {})
1467+
payload = {
1468+
"request_id": req.request_id,
1469+
"skipped": bool(resp.skipped),
1470+
"answers": resp.answers or {},
1471+
}
1472+
self._hitl_response_cache[cache_key] = payload
1473+
return json.dumps(payload)
12431474

12441475
def _emit_tool_called_audit(
12451476
self, task_id: str, tool_name: str, result: str

0 commit comments

Comments
 (0)