diff --git a/.flake8 b/.flake8 index c923419b923..84ae2b48648 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,5 @@ [flake8] ignore = E203,W503 max-line-length = 120 +per-file-ignores = + cecli/spec/runtime.py:E704 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8188eadf1db..08443be148c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: rev: 26.3.1 hooks: - id: black - args: ["--line-length", "100", "--preview"] + args: ["--line-length", "100", "--preview", "--target-version", "py312"] - repo: https://github.com/pycqa/flake8 rev: 7.3.0 hooks: diff --git a/cecli/spec/__init__.py b/cecli/spec/__init__.py new file mode 100644 index 00000000000..141f72521fd --- /dev/null +++ b/cecli/spec/__init__.py @@ -0,0 +1,36 @@ +"""Spec-driven development: EARS, workspace todos, generate/refine, implement focus.""" + +from cecli.spec.ears import analyze_requirements, analyze_traceability, build_spec_index +from cecli.spec.jobs import SpecGenerationJob, spec_gen_timeout_s +from cecli.spec.progress import ( + ImplementationStep, + implementation_steps, + mark_implementation_step_done, + materialize_checklist_from_tasks_md, + merge_agent_progress_into_tasks_md, + next_open_implementation_step, + try_mark_focus_step_complete, +) +from cecli.spec.pubspec_repair import ( + PubspecRepairResult, + find_missing_pubspec_dependencies, + repair_pubspec_dependencies, +) + +__all__ = [ + "ImplementationStep", + "PubspecRepairResult", + "SpecGenerationJob", + "analyze_requirements", + "analyze_traceability", + "build_spec_index", + "find_missing_pubspec_dependencies", + "implementation_steps", + "mark_implementation_step_done", + "materialize_checklist_from_tasks_md", + "merge_agent_progress_into_tasks_md", + "next_open_implementation_step", + "repair_pubspec_dependencies", + "spec_gen_timeout_s", + "try_mark_focus_step_complete", +] diff --git a/cecli/spec/agent_todos.py b/cecli/spec/agent_todos.py new file mode 100644 index 00000000000..92762474d7c --- /dev/null +++ b/cecli/spec/agent_todos.py @@ -0,0 +1,578 @@ +"""Link Cecli agent ``todo.txt`` (UpdateTodoList) with workspace Tasks (``.cecli/todos.json``).""" + +from __future__ import annotations + +import json +import re +import uuid +from dataclasses import dataclass +from pathlib import Path + +from cecli.spec.runtime import AgentTodoSession +from cecli.spec.todos import ( + ChecklistItem, + TodoItem, + TodoStore, + WorkspaceTodos, + _now_iso, +) + +AGENT_PLAN_TITLE = "Agent session plan" +AGENT_PLAN_LINK = "cecli:agent-todo" +AGENT_TODO_LINK_PREFIX = "cecli:agent-todo:" + + +@dataclass(frozen=True) +class AgentTodoRow: + text: str + done: bool + current: bool + + +@dataclass(frozen=True) +class AgentTodoSanitizeContext: + """Optional guards applied when pulling agent todo.txt into workspace Tasks.""" + + focus_step: str | None = None + flutter_test_ok: bool | None = None + + +def sanitize_agent_todo_rows( + rows: list[AgentTodoRow], + *, + ctx: AgentTodoSanitizeContext, + prior_done_texts: frozenset[str], +) -> tuple[list[AgentTodoRow], list[str]]: + """Revert premature done marks from agent UpdateTodoList during implement turns.""" + from cecli.spec.implement import ( + checklist_step_prefix, + is_step_after, + is_test_related_checklist_text, + ) + + warnings: list[str] = [] + sanitized: list[AgentTodoRow] = [] + for row in rows: + keep = row + step = checklist_step_prefix(row.text) + newly_done = row.done and row.text not in prior_done_texts + + if newly_done and ctx.focus_step and step and is_step_after(step, ctx.focus_step): + keep = AgentTodoRow(text=row.text, done=False, current=row.current) + warnings.append( + f"Reverted premature done on **{row.text[:72]}** " + f"(beyond current focus **{ctx.focus_step}**)." + ) + elif ( + newly_done and ctx.flutter_test_ok is False and is_test_related_checklist_text(row.text) + ): + keep = AgentTodoRow(text=row.text, done=False, current=row.current) + warnings.append( + f"Reverted done on **{row.text[:72]}** — BrightVision flutter test did not pass." + ) + + sanitized.append(keep) + return sanitized, warnings + + +def agent_todo_link_for(relpath: str) -> str: + return f"{AGENT_TODO_LINK_PREFIX}{relpath.replace(chr(92), '/')}" + + +def current_agent_todo_row(rows: list[AgentTodoRow]) -> AgentTodoRow | None: + """First ``→`` (current) open row, else first remaining open row.""" + for row in rows: + if row.current and not row.done: + return row + for row in rows: + if not row.done: + return row + return None + + +def load_agent_todo_rows(workspace: str | Path, item: TodoItem | None = None) -> list[AgentTodoRow]: + """Read Cecli agent ``todo.txt`` for implement-turn grounding.""" + root = Path(workspace).resolve() + relpath = parse_agent_todo_link(item.links) if item else None + if not relpath and item and AGENT_PLAN_LINK in item.links: + latest = find_latest_agent_todo_txt(root) + if latest: + relpath = str(latest.relative_to(root)).replace("\\", "/") + if not relpath: + latest = find_latest_agent_todo_txt(root) + if latest: + relpath = str(latest.relative_to(root)).replace("\\", "/") + path = resolve_agent_todo_path(root, relpath) + if not path: + return [] + rows = parse_agent_todo_txt(path.read_text(encoding="utf-8")) + return _recover_char_split_agent_rows(rows) + + +def parse_agent_todo_link(links: list[str]) -> str | None: + for link in links: + if link.startswith(AGENT_TODO_LINK_PREFIX): + return link[len(AGENT_TODO_LINK_PREFIX) :] + return None + + +def is_agent_linked_task(item: TodoItem) -> bool: + return bool(parse_agent_todo_link(item.links)) or AGENT_PLAN_LINK in item.links + + +def _recover_char_split_agent_rows(rows: list[AgentTodoRow]) -> list[AgentTodoRow]: + """ + Recover when UpdateTodoList wrote one todo line per JSON character (local model quirk). + + BrightVision imports agent todo.txt into Tasks checklist + tasks_md; without this, + a corrupted file keeps single-character rows until the user clears the task. + """ + if len(rows) < 8 or not all(len(row.text) <= 2 for row in rows): + return rows + joined = "".join(row.text for row in rows).strip() + if not joined.startswith(("[", "{")): + return rows + try: + parsed = json.loads(joined) + except json.JSONDecodeError: + return rows + if isinstance(parsed, dict): + items = [parsed] + elif isinstance(parsed, list): + items = parsed + else: + return rows + recovered: list[AgentTodoRow] = [] + for item in items: + if not isinstance(item, dict): + continue + text = str(item.get("task") or "").strip() + if not text: + continue + recovered.append( + AgentTodoRow( + text=text, + done=bool(item.get("done", False)), + current=bool(item.get("current", False)), + ) + ) + return recovered or rows + + +def parse_agent_todo_txt(content: str) -> list[AgentTodoRow]: + """Parse ``todo.txt`` written by cecli ``updatetodolist``.""" + rows: list[AgentTodoRow] = [] + for raw in content.splitlines(): + line = raw.rstrip("\n\r") + stripped = line.strip() + if stripped in ("Done:", "Remaining:"): + continue + done = False + current = False + text = line + if line.startswith("✓ "): + done = True + text = line[2:] + elif line.startswith("→ "): + current = True + text = line[2:] + elif line.startswith("○ "): + text = line[2:] + else: + continue + if text != "": + rows.append(AgentTodoRow(text=text, done=done, current=current)) + return rows + + +def format_agent_todo_txt(rows: list[AgentTodoRow]) -> str: + done_tasks: list[str] = [] + remaining: list[str] = [] + for row in rows: + if row.done: + done_tasks.append(f"✓ {row.text}") + elif row.current: + remaining.append(f"→ {row.text}") + else: + remaining.append(f"○ {row.text}") + lines: list[str] = [] + if done_tasks: + lines.append("Done:") + lines.extend(done_tasks) + lines.append("") + if remaining: + lines.append("Remaining:") + lines.extend(remaining) + if lines and lines[-1] == "": + lines.pop() + return "\n".join(lines) + + +def find_latest_agent_todo_txt(workspace: Path) -> Path | None: + agents = workspace / ".cecli" / "agents" + if not agents.is_dir(): + return None + candidates = list(agents.glob("**/todo.txt")) + if not candidates: + return None + return max(candidates, key=lambda p: p.stat().st_mtime) + + +def resolve_agent_todo_path(workspace: Path, relpath: str | None) -> Path | None: + if relpath: + path = workspace / relpath + return path if path.is_file() else None + latest = find_latest_agent_todo_txt(workspace) + return latest + + +def rows_from_checklist(checklist: list[ChecklistItem]) -> list[AgentTodoRow]: + rows: list[AgentTodoRow] = [] + marked_current = False + for entry in checklist: + current = not entry.done and not marked_current + if current: + marked_current = True + rows.append(AgentTodoRow(text=entry.text, done=entry.done, current=current)) + return rows + + +_TASK_MD_LINE = re.compile(r"^-\s*\[([ xX])\]\s*(.+)$") + + +def rows_from_tasks_md(tasks_md: str) -> list[AgentTodoRow]: + rows: list[AgentTodoRow] = [] + marked_current = False + for raw in tasks_md.splitlines(): + m = _TASK_MD_LINE.match(raw.strip()) + if not m: + continue + done = m.group(1).lower() == "x" + text = m.group(2).strip() + if not text: + continue + current = not done and not marked_current + if current: + marked_current = True + rows.append(AgentTodoRow(text=text, done=done, current=current)) + return rows + + +def rows_from_todo_item(item: TodoItem) -> list[AgentTodoRow]: + if item.checklist: + return rows_from_checklist(item.checklist) + if item.tasks_md.strip(): + parsed = rows_from_tasks_md(item.tasks_md) + if parsed: + return parsed + return [] + + +def rows_to_tasks_md(rows: list[AgentTodoRow]) -> str: + lines = ["## Implementation tasks", ""] + for row in rows: + mark = "x" if row.done else " " + lines.append(f"- [{mark}] {row.text}") + return "\n".join(lines).strip() + "\n" + + +def preserve_spec_tasks_md_on_agent_import(item: TodoItem, incoming_tasks_md: str) -> bool: + """Keep spec-generated implementation tasks when syncing agent todo.txt. + + Agent pull updates the runtime checklist; it must not replace a rich + ``tasks_md`` layer produced by generate-spec (numbered steps, REQ refs). + """ + existing = (item.tasks_md or "").strip() + if not existing: + return False + if re.search(r"(?m)^\s*(?:-\s*\[[ xX]\]\s*)?\d+\.", existing): + return True + if re.search(r"REQ-\d+", existing, re.I): + return True + if "depends:" in existing.lower(): + return True + incoming = (incoming_tasks_md or "").strip() + if incoming and len(existing) > len(incoming) + 40: + return True + return False + + +def _usable_plan_title_text(text: str) -> bool: + """Reject char-split JSON debris (e.g. ``[``) mistaken for a task title after /agent.""" + t = text.strip() + if not t: + return False + alnum = sum(1 for c in t if c.isalnum()) + if len(t) <= 2 and alnum < 2: + return False + return True + + +def plan_title_from_rows(rows: list[AgentTodoRow]) -> str: + for row in rows: + if row.current and not row.done: + t = row.text.strip() + if _usable_plan_title_text(t): + return t[:120] + for row in rows: + if not row.done: + t = row.text.strip() + if _usable_plan_title_text(t): + return t[:120] + return AGENT_PLAN_TITLE + + +def _ensure_agent_link(item: TodoItem, agent_todo_relpath: str | None) -> None: + if agent_todo_relpath: + link = agent_todo_link_for(agent_todo_relpath) + if link not in item.links: + item.links = [*item.links, link] + elif AGENT_PLAN_LINK not in item.links: + item.links = [*item.links, AGENT_PLAN_LINK] + + +def _resolve_target_task(store: TodoStore, target_todo_id: str | None) -> TodoItem | None: + if target_todo_id: + return store.todos and next((t for t in store.todos if t.id == target_todo_id), None) + if not store.active_id: + return None + item = next((t for t in store.todos if t.id == store.active_id), None) + if item and item.status not in ("done", "cancelled"): + return item + return None + + +def import_agent_plan_store( + store: TodoStore, + rows: list[AgentTodoRow], + *, + target_todo_id: str | None = None, + agent_todo_relpath: str | None = None, +) -> TodoStore: + if not rows: + return store + + rows = _recover_char_split_agent_rows(rows) + + incoming_tasks_md = rows_to_tasks_md(rows) + from cecli.spec.progress import ( + checklist_from_agent_rows, + merge_agent_progress_into_tasks_md, + ) + + target = _resolve_target_task(store, target_todo_id) + + def _apply_rows_to_item(task: TodoItem) -> None: + task.checklist = checklist_from_agent_rows(rows, prior=task.checklist) + if preserve_spec_tasks_md_on_agent_import(task, incoming_tasks_md): + task.tasks_md = merge_agent_progress_into_tasks_md(task.tasks_md, rows) + else: + task.tasks_md = incoming_tasks_md + + any_open = any(not row.done for row in rows) + status: str = "in_progress" if any_open else "done" + now = _now_iso() + + if target: + target.title = ( + plan_title_from_rows(rows) + if target.title in (AGENT_PLAN_TITLE, "Untitled") + else target.title + ) + _apply_rows_to_item(target) + if target.status not in ("done", "cancelled"): + target.status = status # type: ignore[assignment] + target.updated_at = now + _ensure_agent_link(target, agent_todo_relpath) + store.active_id = target.id + return store + + existing = next( + ( + t + for t in store.todos + if AGENT_PLAN_LINK in t.links + or parse_agent_todo_link(t.links) + or t.title == AGENT_PLAN_TITLE + ), + None, + ) + title = plan_title_from_rows(rows) + if existing: + existing.title = title + _apply_rows_to_item(existing) + existing.status = status # type: ignore[assignment] + existing.updated_at = now + _ensure_agent_link(existing, agent_todo_relpath) + store.active_id = existing.id + else: + item = TodoItem( + id=uuid.uuid4().hex, + title=title, + tasks_md=incoming_tasks_md, + status=status, # type: ignore[arg-type] + links=[AGENT_PLAN_LINK], + checklist=checklist_from_agent_rows(rows), + created_at=now, + updated_at=now, + ) + _apply_rows_to_item(item) + _ensure_agent_link(item, agent_todo_relpath) + store.todos.insert(0, item) + store.active_id = item.id + + return store + + +def export_todo_item_to_agent(workspace: Path, relpath: str, item: TodoItem) -> None: + rows = rows_from_todo_item(item) + if not rows: + return + path = workspace / relpath + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(format_agent_todo_txt(rows) + "\n", encoding="utf-8") + + +def export_agent_plan_for_task(workspace_dir: str | Path, todo_id: str) -> None: + api = WorkspaceTodos(workspace_dir) + store = api.load() + item = api.find(store, todo_id) + if not item: + raise ValueError(f"Unknown task: {todo_id}") + relpath = parse_agent_todo_link(item.links) + if not relpath: + raise ValueError("Task is not linked to a Cecli agent todo.txt") + export_todo_item_to_agent(api.root, relpath, item) + + +def import_agent_plan_for_workspace( + workspace_dir: str | Path, + *, + agent_todo_relpath: str | None = None, + target_todo_id: str | None = None, +) -> TodoStore: + api = WorkspaceTodos(workspace_dir) + root = api.root + todo_path = resolve_agent_todo_path(root, agent_todo_relpath) + if not todo_path: + raise FileNotFoundError( + "No Cecli agent todo.txt in this workspace (.cecli/agents/…/todo.txt)" + ) + rows = parse_agent_todo_txt(todo_path.read_text(encoding="utf-8")) + if not rows: + raise ValueError("Agent todo.txt is empty") + relpath = agent_todo_relpath or str(todo_path.relative_to(root)).replace("\\", "/") + store = import_agent_plan_store( + api.load(), + rows, + target_todo_id=target_todo_id, + agent_todo_relpath=relpath, + ) + api.save(store) + active = next((t for t in store.todos if t.id == store.active_id), None) + if active: + api.sync_spec_files(active) + return store + + +def session_agent_todo_relpath(session: AgentTodoSession) -> str: + return session.coder.local_agent_folder("todo.txt") + + +def _resolve_agent_todo_pull_relpath( + api: WorkspaceTodos, + store: TodoStore, + session: AgentTodoSession, +) -> str: + """Prefer the active task's linked agent todo over this session's stale copy.""" + session_relpath = session_agent_todo_relpath(session) + active = api.find(store, store.active_id) if store.active_id else None + if active: + linked = parse_agent_todo_link(active.links) + if linked: + linked_path = api.root / linked + if linked_path.is_file(): + return linked.replace("\\", "/") + session_path = api.root / session_relpath + if session_path.is_file(): + return session_relpath + latest = find_latest_agent_todo_txt(api.root) + if latest: + return str(latest.relative_to(api.root)).replace("\\", "/") + return session_relpath + + +def try_import_agent_plan_for_workspace( + workspace_dir: str | Path, + *, + agent_todo_relpath: str | None = None, +) -> TodoStore | None: + """Import agent todo.txt when present; return None if missing or empty.""" + try: + return import_agent_plan_for_workspace(workspace_dir, agent_todo_relpath=agent_todo_relpath) + except (FileNotFoundError, ValueError): + return None + + +def sync_session_agent_todos( + session: AgentTodoSession, + *, + pull: bool = True, + push_active: bool = True, + sanitize: AgentTodoSanitizeContext | None = None, + prior_done_texts: frozenset[str] | None = None, +) -> tuple[TodoStore, list[str]]: + """ + Two-way link for the current chat session: + - pull: agent todo.txt → workspace (active task, or agent-plan task) + - push: active workspace task → this session's todo.txt + + Returns ``(store, sanitize_warnings)``. + """ + api = WorkspaceTodos(session.coder.root) + session_relpath = session_agent_todo_relpath(session) + store = api.load() + warnings: list[str] = [] + + if pull: + pull_relpath = _resolve_agent_todo_pull_relpath(api, store, session) + path = api.root / pull_relpath + if path.is_file(): + rows = parse_agent_todo_txt(path.read_text(encoding="utf-8")) + if rows and sanitize is not None: + rows, warnings = sanitize_agent_todo_rows( + rows, + ctx=sanitize, + prior_done_texts=prior_done_texts or frozenset(), + ) + if warnings: + path.write_text(format_agent_todo_txt(rows) + "\n", encoding="utf-8") + if rows: + store = import_agent_plan_store( + store, + rows, + target_todo_id=store.active_id, + agent_todo_relpath=pull_relpath, + ) + + if push_active and store.active_id: + item = api.find(store, store.active_id) + if item: + export_todo_item_to_agent(api.root, session_relpath, item) + _ensure_agent_link(item, session_relpath) + item.updated_at = _now_iso() + + api.save(store) + if store.active_id: + active = api.find(store, store.active_id) + if active: + api.sync_spec_files(active) + return store, warnings + + +def maybe_export_task_to_agent(workspace_dir: str | Path, item: TodoItem) -> None: + """After a workspace task edit, push to linked agent todo.txt if bound.""" + relpath = parse_agent_todo_link(item.links) + if not relpath: + return + export_todo_item_to_agent(Path(workspace_dir).resolve(), relpath, item) diff --git a/cecli/spec/ears/__init__.py b/cecli/spec/ears/__init__.py new file mode 100644 index 00000000000..385aa8fd7cd --- /dev/null +++ b/cecli/spec/ears/__init__.py @@ -0,0 +1,28 @@ +""" +EARS (Easy Approach to Requirements Syntax) — spec grammar, lint, and index. + +Standalone package: no imports from Session, http_api, or workspace_todos. +Designed for eventual lift into cecli (see docs/EARS_MODULE.md). +""" + +from cecli.spec.ears.index import build_spec_index +from cecli.spec.ears.lint import analyze_requirements +from cecli.spec.ears.model import ( + EarsClause, + EarsIssue, + EarsLintResult, + PatternKind, + Severity, +) +from cecli.spec.ears.trace import analyze_traceability + +__all__ = [ + "EarsClause", + "EarsIssue", + "EarsLintResult", + "PatternKind", + "Severity", + "analyze_requirements", + "analyze_traceability", + "build_spec_index", +] diff --git a/cecli/spec/ears/index.py b/cecli/spec/ears/index.py new file mode 100644 index 00000000000..166a27a05b7 --- /dev/null +++ b/cecli/spec/ears/index.py @@ -0,0 +1,180 @@ +"""Repo-wide ``.cecli/specs/**`` index vs ``todos.json`` (roadmap #22).""" + +from __future__ import annotations + +from collections import defaultdict +from dataclasses import asdict, dataclass, field +from pathlib import Path +from typing import Any + +from cecli.spec.ears.lint import analyze_requirements +from cecli.spec.ears.model import EarsIssue, Severity +from cecli.spec.ears.parse import parse_requirements_markdown + + +@dataclass +class SpecFolderRecord: + todo_id: str + has_requirements: bool = False + has_design: bool = False + has_tasks: bool = False + req_ids: list[str] = field(default_factory=list) + requirements_ok: bool | None = None + requirements_errors: int = 0 + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +@dataclass +class SpecIndexResult: + issues: list[EarsIssue] = field(default_factory=list) + folders: list[SpecFolderRecord] = field(default_factory=list) + task_ids: list[str] = field(default_factory=list) + + @property + def ok(self) -> bool: + return not any(i.severity == "error" for i in self.issues) + + @property + def error_count(self) -> int: + return sum(1 for i in self.issues if i.severity == "error") + + @property + def warning_count(self) -> int: + return sum(1 for i in self.issues if i.severity == "warning") + + def to_dict(self) -> dict[str, Any]: + return { + "ok": self.ok, + "error_count": self.error_count, + "warning_count": self.warning_count, + "task_ids": list(self.task_ids), + "folders": [f.to_dict() for f in self.folders], + "issues": [i.to_dict() for i in self.issues], + } + + +def _issue( + code: str, + message: str, + severity: Severity, + *, + todo_id: str | None = None, + req_id: str | None = None, +) -> EarsIssue: + return EarsIssue( + code=code, + message=message, + severity=severity, + req_id=req_id, + todo_id=todo_id, + line=None, + ) + + +def _read_layer(path: Path) -> str: + try: + return path.read_text(encoding="utf-8") if path.is_file() else "" + except OSError: + return "" + + +def _scan_spec_folder(folder: Path) -> SpecFolderRecord: + todo_id = folder.name + rec = SpecFolderRecord(todo_id=todo_id) + req_path = folder / "requirements.md" + if req_path.is_file(): + rec.has_requirements = True + text = _read_layer(req_path) + clauses = parse_requirements_markdown(text) + seen: set[str] = set() + for clause in clauses: + if clause.req_id: + rid = clause.req_id.upper() + if rid not in seen: + seen.add(rid) + rec.req_ids.append(rid) + lint = analyze_requirements(text, source_path=str(req_path)) + rec.requirements_ok = lint.ok + rec.requirements_errors = lint.error_count + rec.has_design = (folder / "design.md").is_file() + rec.has_tasks = (folder / "tasks.md").is_file() + return rec + + +def build_spec_index( + workspace_root: str | Path, + *, + task_ids: list[str] | None = None, + specs_root: Path | None = None, +) -> SpecIndexResult: + """ + Compare ``.cecli/specs/{id}/`` on disk to workspace task ids. + + ``task_ids`` — from ``todos.json``; when omitted, only folder scan runs. + """ + root = Path(workspace_root).resolve() + specs = specs_root or root / ".cecli" / "specs" + issues: list[EarsIssue] = [] + folders: list[SpecFolderRecord] = [] + + known_tasks = {t.strip() for t in task_ids or [] if t.strip()} + if specs.is_dir(): + for entry in sorted(specs.iterdir()): + if not entry.is_dir() or entry.name.startswith("."): + continue + rec = _scan_spec_folder(entry) + folders.append(rec) + if known_tasks and rec.todo_id not in known_tasks: + issues.append( + _issue( + "SPEC_ORPHAN_FOLDER", + f"Spec folder `.cecli/specs/{rec.todo_id}/` has no matching task in todos.json.", + "warning", + todo_id=rec.todo_id, + ) + ) + if rec.has_requirements and rec.requirements_ok is False: + issues.append( + _issue( + "SPEC_REQ_LINT", + f"requirements.md has {rec.requirements_errors} EARS error(s).", + "error", + todo_id=rec.todo_id, + ) + ) + + if known_tasks: + folder_ids = {f.todo_id for f in folders} + for tid in sorted(known_tasks): + if tid not in folder_ids: + issues.append( + _issue( + "SPEC_MISSING_FOLDER", + f"Task `{tid}` has no `.cecli/specs/{tid}/` folder (sync writes on save).", + "info", + todo_id=tid, + ) + ) + + by_req: dict[str, list[str]] = defaultdict(list) + for rec in folders: + for rid in rec.req_ids: + by_req[rid.upper()].append(rec.todo_id) + for rid, tasks in sorted(by_req.items()): + if len(tasks) > 1: + issues.append( + _issue( + "SPEC_REQ_ID_GLOBAL_DUP", + f"{rid} appears in multiple tasks: {', '.join(tasks)}.", + "error", + req_id=rid, + ) + ) + + return SpecIndexResult( + issues=issues, + folders=folders, + task_ids=sorted(known_tasks), + ) diff --git a/cecli/spec/ears/lint.py b/cecli/spec/ears/lint.py new file mode 100644 index 00000000000..fd370225196 --- /dev/null +++ b/cecli/spec/ears/lint.py @@ -0,0 +1,143 @@ +"""Deterministic EARS lint for requirements markdown.""" + +from __future__ import annotations + +import re +from collections import Counter + +from cecli.spec.ears.model import EarsClause, EarsIssue, EarsLintResult, Severity +from cecli.spec.ears.parse import parse_requirements_markdown +from cecli.spec.ears.patterns import classify_clause, has_shall, has_the_system_shall + +_REQ_HEADING = re.compile(r"^###\s+(REQ-\d+)\b", re.I | re.M) + + +def _issue( + code: str, + message: str, + severity: Severity, + *, + line: int | None = None, + req_id: str | None = None, +) -> EarsIssue: + return EarsIssue( + code=code, + message=message, + severity=severity, + line=line, + req_id=req_id, + ) + + +def lint_clauses( + clauses: list[EarsClause], + *, + heading_ids: list[str] | None = None, +) -> list[EarsIssue]: + issues: list[EarsIssue] = [] + + if not clauses: + issues.append( + _issue( + "EARS_EMPTY", + "No requirement clauses found. Add ### REQ-001 headings and EARS bullets.", + "error", + ) + ) + return issues + + # Duplicates are repeated requirement *headings*, not multiple acceptance + # criteria sharing one heading (Kiro-style requirements have several ACs per id). + dup_source = heading_ids if heading_ids is not None else [c.req_id for c in clauses if c.req_id] + id_counts = Counter(rid for rid in dup_source if rid) + for req_id, count in id_counts.items(): + if count > 1: + issues.append( + _issue( + "EARS_DUP_ID", + f"Duplicate requirement id {req_id} ({count} headings).", + "error", + req_id=req_id, + ) + ) + + for clause in clauses: + if not clause.req_id: + issues.append( + _issue( + "EARS_REQ_ID", + "Clause is not under a ### REQ-### heading.", + "warning", + line=clause.line, + ) + ) + + if not has_shall(clause.text): + issues.append( + _issue( + "EARS_NO_SHALL", + "Requirement clause should include SHALL (EARS normative statement).", + "error", + line=clause.line, + req_id=clause.req_id, + ) + ) + continue + + if not has_the_system_shall(clause.text): + issues.append( + _issue( + "EARS_NO_SUBJECT", + "Prefer **THE** system **SHALL** (or THE SHALL) for clarity.", + "warning", + line=clause.line, + req_id=clause.req_id, + ) + ) + + pattern = classify_clause(clause.text) + upper = clause.text.upper() + if ( + pattern in ("complex", "unknown") + and has_shall(clause.text) + and "WHEN" not in upper + and "WHILE" not in upper + and "IF " not in upper + ): + issues.append( + _issue( + "EARS_EVENT_NO_WHEN", + "Normative clause has no WHEN/WHILE/IF — use event-driven form or ubiquitous THE … SHALL.", + "warning", + line=clause.line, + req_id=clause.req_id, + ) + ) + if pattern == "unknown" and has_shall(clause.text): + issues.append( + _issue( + "EARS_AMBIGUOUS", + "Could not classify EARS pattern (ubiquitous, event, state, unwanted, optional).", + "info", + line=clause.line, + req_id=clause.req_id, + ) + ) + + return issues + + +def analyze_requirements( + text: str, + *, + source_path: str | None = None, +) -> EarsLintResult: + """Lint a requirements markdown string (Tasks layer or .cecli/specs/.../requirements.md).""" + clauses = parse_requirements_markdown(text) + heading_ids = [m.group(1).upper() for m in _REQ_HEADING.finditer(text or "")] + issues = lint_clauses(clauses, heading_ids=heading_ids) + return EarsLintResult( + issues=issues, + clauses=clauses, + source_path=source_path, + ) diff --git a/cecli/spec/ears/model.py b/cecli/spec/ears/model.py new file mode 100644 index 00000000000..fa344b75203 --- /dev/null +++ b/cecli/spec/ears/model.py @@ -0,0 +1,78 @@ +"""Data types for EARS lint and traceability (JSON-serializable).""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from typing import Any, Literal + +Severity = Literal["error", "warning", "info"] +PatternKind = Literal[ + "ubiquitous", + "event_driven", + "state_driven", + "unwanted", + "optional", + "complex", + "unknown", +] + + +@dataclass +class EarsClause: + """One requirement bullet or paragraph under a REQ heading.""" + + req_id: str | None + line: int + text: str + pattern: PatternKind + + +@dataclass +class EarsIssue: + code: str + message: str + severity: Severity + line: int | None = None + req_id: str | None = None + todo_id: str | None = None + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +@dataclass +class EarsLintResult: + issues: list[EarsIssue] = field(default_factory=list) + clauses: list[EarsClause] = field(default_factory=list) + source_path: str | None = None + + @property + def ok(self) -> bool: + return not any(i.severity == "error" for i in self.issues) + + @property + def error_count(self) -> int: + return sum(1 for i in self.issues if i.severity == "error") + + @property + def warning_count(self) -> int: + return sum(1 for i in self.issues if i.severity == "warning") + + def to_dict(self) -> dict[str, Any]: + return { + "ok": self.ok, + "error_count": self.error_count, + "warning_count": self.warning_count, + "source_path": self.source_path, + "issues": [i.to_dict() for i in self.issues], + "clauses": [asdict(c) for c in self.clauses], + } + + +def merge_results(*results: EarsLintResult) -> EarsLintResult: + issues: list[EarsIssue] = [] + clauses: list[EarsClause] = [] + for r in results: + issues.extend(r.issues) + clauses.extend(r.clauses) + return EarsLintResult(issues=issues, clauses=clauses) diff --git a/cecli/spec/ears/parse.py b/cecli/spec/ears/parse.py new file mode 100644 index 00000000000..fbee0baf17e --- /dev/null +++ b/cecli/spec/ears/parse.py @@ -0,0 +1,72 @@ +"""Parse requirements markdown into EARS clauses.""" + +from __future__ import annotations + +import re + +from cecli.spec.ears.model import EarsClause +from cecli.spec.ears.patterns import classify_clause + +# Allow an optional title after the id (Kiro-style: "### REQ-001: Health check"). +_REQ_HEADING = re.compile(r"^###\s+(REQ-\d+)\b.*$", re.I) +_BULLET = re.compile(r"^(\s*[-*]|\s*\d+\.)\s+") +# Kiro-style labels — not normative EARS clauses (may contain "if"/"while" in prose). +_SKIP_LABEL = re.compile(r"^\*\*(User Story:|Acceptance Criteria)\*\*", re.I) +# Lines that read like normative EARS prose (vs. descriptive User Story text). +_EARS_KEYWORD = re.compile(r"\b(SHALL|WHEN|WHILE|WHERE)\b|\bIF\b", re.I) + + +def parse_requirements_markdown(text: str) -> list[EarsClause]: + """Extract requirement clauses with line numbers and optional REQ ids.""" + lines = text.replace("\r\n", "\n").split("\n") + clauses: list[EarsClause] = [] + current_req_id: str | None = None + buf: list[str] = [] + buf_line = 0 + + def flush() -> None: + nonlocal buf + if not buf: + return + body = " ".join(s.strip() for s in buf if s.strip()) + if body: + clauses.append( + EarsClause( + req_id=current_req_id, + line=buf_line, + text=body, + pattern=classify_clause(body), + ) + ) + buf = [] + + for i, raw in enumerate(lines, start=1): + line = raw.rstrip() + m = _REQ_HEADING.match(line.strip()) + if m: + flush() + current_req_id = m.group(1).upper() + continue + stripped = line.strip() + if not stripped: + flush() + continue + if _SKIP_LABEL.match(stripped): + flush() + continue + if _BULLET.match(line) or (current_req_id and "**WHEN**" in stripped.upper() and not buf): + flush() + buf_line = i + buf = [stripped.lstrip("-* ").strip()] + continue + if buf: + buf.append(stripped) + continue + if current_req_id and stripped and _EARS_KEYWORD.search(stripped): + # Only start a clause for normative prose; descriptive lines such as + # "**User Story:** As a …" or an "**Acceptance Criteria**" label are skipped. + buf_line = i + buf = [stripped] + + flush() + return clauses diff --git a/cecli/spec/ears/patterns.py b/cecli/spec/ears/patterns.py new file mode 100644 index 00000000000..c4a16f04c85 --- /dev/null +++ b/cecli/spec/ears/patterns.py @@ -0,0 +1,44 @@ +"""Classify EARS clause shapes (deterministic).""" + +from __future__ import annotations + +import re + +from cecli.spec.ears.model import PatternKind + +_RE_WHEN = re.compile(r"\bWHEN\b", re.I) +_RE_WHILE = re.compile(r"\bWHILE\b", re.I) +_RE_IF_THEN = re.compile(r"\bIF\b.+\bTHEN\b", re.I | re.S) +_RE_WHERE = re.compile(r"\bWHERE\b", re.I) +_RE_SHALL = re.compile(r"\bSHALL\b", re.I) +_RE_THE_SYSTEM_SHALL = re.compile( + r"\bTHE\b.+\bSHALL\b", + re.I | re.S, +) + + +def classify_clause(text: str) -> PatternKind: + t = text.strip() + if not t: + return "unknown" + if _RE_IF_THEN.search(t): + return "unwanted" + if _RE_WHERE.search(t) and _RE_SHALL.search(t): + return "optional" + if _RE_WHILE.search(t) and _RE_SHALL.search(t): + return "state_driven" + if _RE_WHEN.search(t) and _RE_SHALL.search(t): + return "event_driven" + if _RE_THE_SYSTEM_SHALL.search(t): + return "ubiquitous" + if _RE_SHALL.search(t): + return "complex" + return "unknown" + + +def has_shall(text: str) -> bool: + return bool(_RE_SHALL.search(text)) + + +def has_the_system_shall(text: str) -> bool: + return bool(_RE_THE_SYSTEM_SHALL.search(text)) diff --git a/cecli/spec/ears/prompt.py b/cecli/spec/ears/prompt.py new file mode 100644 index 00000000000..e14faa3e211 --- /dev/null +++ b/cecli/spec/ears/prompt.py @@ -0,0 +1,46 @@ +"""EARS / trace context for LLM spec generate and refine (E5).""" + +from __future__ import annotations + +from cecli.spec.ears.lint import analyze_requirements +from cecli.spec.ears.report import format_lint_summary, format_trace_summary +from cecli.spec.ears.trace import analyze_traceability +from cecli.spec.layers import assess_spec_richness + + +def format_spec_quality_for_prompt( + requirements: str, + design: str, + tasks_md: str, +) -> str: + """Deterministic lint + trace summary appended to generate/refine prompts.""" + req = (requirements or "").strip() + des = (design or "").strip() + tsk = (tasks_md or "").strip() + if not req and not des and not tsk: + return "" + lint = analyze_requirements(req) if req else None + trace = analyze_traceability(req, des, tsk) if req else None + parts: list[str] = ["", "## Current spec quality (fix in your output)"] + if lint: + parts.append(format_lint_summary(lint)) + if trace: + parts.append(format_trace_summary(trace)) + for issue in trace.issues[:8]: + parts.append(f"- [{issue.severity}] {issue.code}: {issue.message}") + _, depth = assess_spec_richness(req, des, tsk) + if depth: + parts.append("Deepen the spec (Kiro-grade):") + for hint in depth: + parts.append(f"- {hint}") + parts.append( + "Use ### REQ-### headings with a **User Story** and numbered EARS acceptance criteria " + "(**WHEN** … **THE** system **SHALL** …). Align design and implementation tasks with every REQ id." + ) + return "\n".join(parts) + + +def requirements_pass_ears(requirements: str) -> tuple[bool, list[dict]]: + """Return (ok, issue dicts) for apply gate.""" + result = analyze_requirements(requirements or "") + return result.ok, [i.to_dict() for i in result.issues if i.severity == "error"] diff --git a/cecli/spec/ears/repair.py b/cecli/spec/ears/repair.py new file mode 100644 index 00000000000..75c3397a095 --- /dev/null +++ b/cecli/spec/ears/repair.py @@ -0,0 +1,25 @@ +"""Deterministic repairs for small-model requirement drafts before EARS gate.""" + +from __future__ import annotations + +from cecli.spec.ears.parse import parse_requirements_markdown +from cecli.spec.ears.patterns import has_shall + +_SHALL_SUFFIX = " **THE** system **SHALL** satisfy this acceptance criterion." + + +def repair_requirements_missing_shall(requirements: str) -> str: + """Add normative SHALL to parsed EARS clauses missing SHALL (common small-model slip).""" + if not (requirements or "").strip(): + return requirements + lines = requirements.replace("\r\n", "\n").split("\n") + fixed_line_nums: set[int] = set() + for clause in parse_requirements_markdown(requirements): + if has_shall(clause.text): + continue + idx = clause.line - 1 + if idx < 0 or idx >= len(lines) or idx in fixed_line_nums: + continue + lines[idx] = lines[idx].rstrip() + _SHALL_SUFFIX + fixed_line_nums.add(idx) + return "\n".join(lines) diff --git a/cecli/spec/ears/report.py b/cecli/spec/ears/report.py new file mode 100644 index 00000000000..d3496d4a1cb --- /dev/null +++ b/cecli/spec/ears/report.py @@ -0,0 +1,42 @@ +"""Human-readable summaries for UI and logs.""" + +from __future__ import annotations + +from cecli.spec.ears.index import SpecIndexResult +from cecli.spec.ears.model import EarsLintResult +from cecli.spec.ears.trace import TraceabilityResult + + +def format_lint_summary(result: EarsLintResult) -> str: + if result.ok and not result.issues: + return "EARS: no issues." + parts = [f"EARS: {result.error_count} error(s), {result.warning_count} warning(s)."] + for issue in result.issues[:12]: + loc = "" + if issue.line: + loc = f" line {issue.line}" + if issue.req_id: + loc += f" ({issue.req_id})" + parts.append(f"- [{issue.severity}] {issue.code}{loc}: {issue.message}") + if len(result.issues) > 12: + parts.append(f"- … and {len(result.issues) - 12} more") + return "\n".join(parts) + + +def format_spec_index_summary(result: SpecIndexResult) -> str: + if result.ok and not result.issues: + return f"Spec index: {len(result.folders)} folder(s), {len(result.task_ids)} task(s) — OK." + return ( + f"Spec index: {result.error_count} error(s), {result.warning_count} warning(s) " + f"({len(result.folders)} folders, {len(result.task_ids)} tasks)." + ) + + +def format_trace_summary(result: TraceabilityResult) -> str: + if not result.req_ids: + return "Trace: no REQ-### ids in requirements." + covered = sum(1 for link in result.links if link.in_design or link.task_steps) + return ( + f"Trace: {covered}/{len(result.req_ids)} REQ ids referenced in design or tasks. " + f"{result.error_count} error(s), {result.warning_count} warning(s)." + ) diff --git a/cecli/spec/ears/trace.py b/cecli/spec/ears/trace.py new file mode 100644 index 00000000000..05932503349 --- /dev/null +++ b/cecli/spec/ears/trace.py @@ -0,0 +1,181 @@ +"""REQ ↔ design ↔ implementation task traceability (roadmap E4).""" + +from __future__ import annotations + +import re +from dataclasses import asdict, dataclass, field +from typing import Any + +from cecli.spec.ears.model import EarsIssue, Severity +from cecli.spec.ears.parse import parse_requirements_markdown + +_REQ_REF = re.compile(r"\b(REQ-\d+)\b", re.I) +_IMPL_STEP = re.compile( + r"^\s*(?:-\s*\[([ xX])\]\s*)?(\d+)\.\s*(.+?)(?:\s*\(depends:\s*[^)]+\))?\s*$", + re.I, +) +_DESIGN_HEADING = re.compile(r"^#{2,}\s+(.+)$") + + +@dataclass +class TraceStep: + number: int + text: str + done: bool + req_refs: list[str] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +@dataclass +class TraceLink: + req_id: str + in_design: bool = False + task_steps: list[int] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + +@dataclass +class TraceabilityResult: + issues: list[EarsIssue] = field(default_factory=list) + req_ids: list[str] = field(default_factory=list) + links: list[TraceLink] = field(default_factory=list) + steps: list[TraceStep] = field(default_factory=list) + design_headings: list[str] = field(default_factory=list) + + @property + def ok(self) -> bool: + return not any(i.severity == "error" for i in self.issues) + + @property + def error_count(self) -> int: + return sum(1 for i in self.issues if i.severity == "error") + + @property + def warning_count(self) -> int: + return sum(1 for i in self.issues if i.severity == "warning") + + def to_dict(self) -> dict[str, Any]: + return { + "ok": self.ok, + "error_count": self.error_count, + "warning_count": self.warning_count, + "req_ids": list(self.req_ids), + "links": [link.to_dict() for link in self.links], + "steps": [s.to_dict() for s in self.steps], + "design_headings": list(self.design_headings), + "issues": [i.to_dict() for i in self.issues], + } + + +def _issue( + code: str, + message: str, + severity: Severity, + *, + req_id: str | None = None, +) -> EarsIssue: + return EarsIssue(code=code, message=message, severity=severity, req_id=req_id) + + +def _extract_req_refs(text: str) -> set[str]: + return {m.group(1).upper() for m in _REQ_REF.finditer(text)} + + +def _parse_steps(tasks_md: str) -> list[TraceStep]: + steps: list[TraceStep] = [] + for line in tasks_md.replace("\r\n", "\n").split("\n"): + m = _IMPL_STEP.match(line.strip()) + if not m: + continue + body = m.group(3).strip() + steps.append( + TraceStep( + number=int(m.group(2)), + text=body, + done=m.group(1) is not None and m.group(1).lower() == "x", + req_refs=sorted(_extract_req_refs(body)), + ) + ) + return sorted(steps, key=lambda s: s.number) + + +def _parse_design_headings(design: str) -> list[str]: + headings: list[str] = [] + for line in design.replace("\r\n", "\n").split("\n"): + m = _DESIGN_HEADING.match(line.strip()) + if m: + headings.append(m.group(1).strip()) + return headings + + +def analyze_traceability( + requirements: str, + design: str, + tasks_md: str, +) -> TraceabilityResult: + """Map REQ ids across the three spec layers for one task.""" + issues: list[EarsIssue] = [] + clauses = parse_requirements_markdown(requirements) + req_ids: list[str] = [] + seen: set[str] = set() + for clause in clauses: + if clause.req_id: + rid = clause.req_id.upper() + if rid not in seen: + seen.add(rid) + req_ids.append(rid) + + design_refs = _extract_req_refs(design) + design_headings = _parse_design_headings(design) + steps = _parse_steps(tasks_md) + task_refs: set[str] = set() + for step in steps: + task_refs.update(step.req_refs) + + links: list[TraceLink] = [] + for rid in req_ids: + in_design = rid in design_refs + task_steps = [s.number for s in steps if rid in s.req_refs] + links.append(TraceLink(req_id=rid, in_design=in_design, task_steps=task_steps)) + if not in_design and not task_steps: + issues.append( + _issue( + "TRACE_REQ_UNCOVERED", + f"{rid} is not referenced in design.md or tasks.md.", + "warning", + req_id=rid, + ) + ) + + known = set(req_ids) + for ref in sorted(design_refs | task_refs): + if ref not in known: + issues.append( + _issue( + "TRACE_REQ_UNKNOWN", + f"{ref} is referenced in design/tasks but not declared under Requirements.", + "error", + req_id=ref, + ) + ) + + if req_ids and not design.strip() and not tasks_md.strip(): + issues.append( + _issue( + "TRACE_LAYER_EMPTY", + "Requirements exist but design and implementation tasks are empty.", + "warning", + ) + ) + + return TraceabilityResult( + issues=issues, + req_ids=req_ids, + links=links, + steps=steps, + design_headings=design_headings, + ) diff --git a/cecli/spec/focus.py b/cecli/spec/focus.py new file mode 100644 index 00000000000..92af25ad5ec --- /dev/null +++ b/cecli/spec/focus.py @@ -0,0 +1,208 @@ +"""When spec-focus mode actually applies (active task + spec content).""" + +from __future__ import annotations + +import re +from pathlib import Path + +from cecli.spec.implement import build_implement_workspace_block +from cecli.spec.steering import ( + IMPLEMENTATION_TOOL_HINTS, + SCAFFOLD_HINT, + SPEC_FOCUS_INSTRUCTIONS, + build_spec_focus_preamble, +) +from cecli.spec.todos import ( + TodoItem, + TodoStore, + format_todo_context, + format_todo_context_implement, + format_todo_context_light, + migrate_todo_layers, +) + +_SPEC_LAYER_PLACEHOLDERS = frozenset( + { + "(No requirements yet.)", + "(No design yet.)", + "(No implementation tasks yet.)", + } +) + +_IMPLEMENT_STEP_RE = re.compile( + r"^implement only implementation task\s+\d+", + re.IGNORECASE, +) + + +def todo_has_spec_content(item: TodoItem) -> bool: + """True when the task has non-placeholder requirements, design, or legacy spec. + + Checklist / ``tasks_md`` alone do not count — those are normal tasks-without-specs. + """ + item = migrate_todo_layers(item) + for field in (item.requirements, item.design, item.spec): + text = field.strip() + if text and text not in _SPEC_LAYER_PLACEHOLDERS: + return True + return False + + +def _task_has_checklist(item: TodoItem) -> bool: + return any(entry.text.strip() for entry in item.checklist) + + +def is_implement_turn_message(message: str) -> bool: + """Start work / implement-step prompts from Tasks tab.""" + trimmed = message.strip() + lower = trimmed.lower() + if lower.startswith("/agent"): + trimmed = trimmed[6:].lstrip() + lower = trimmed.lower() + if _IMPLEMENT_STEP_RE.match(trimmed): + return True + if lower.startswith("implement the active task per the injected"): + return True + if lower.startswith("work the active task checklist"): + return True + if lower.startswith("continue the active task"): + return True + return False + + +def spec_focus_requested( + *, + message_spec_focus: bool, + session_spec_focus: bool, + session_mode: str, +) -> bool: + return bool(message_spec_focus or session_spec_focus or session_mode == "spec") + + +def should_inject_task_context( + *, + focus_requested: bool, + item: TodoItem | None, + inject_todo_spec: bool, + agent_continuation: bool = False, + message: str | None = None, +) -> bool: + if item is None: + return False + if agent_continuation: + return False + if message and _is_resume_implement_message(message): + return False + if inject_todo_spec: + return True + if not focus_requested: + return False + # Spec layers stay in chat after the first inject — avoid re-sending ~12k every turn. + if todo_has_spec_content(item): + return False + return _task_has_checklist(item) + + +def spec_focus_preamble_applies( + *, + focus_requested: bool, + item: TodoItem | None, +) -> bool: + """Generic spec-focus instructions only when an active task has real spec layers.""" + return bool(focus_requested and item is not None and todo_has_spec_content(item)) + + +def _is_resume_implement_message(message: str) -> bool: + trimmed = message.strip().lower() + if trimmed.startswith("/agent"): + trimmed = trimmed[6:].lstrip() + return trimmed.startswith("continue the active task") + + +def build_user_message_with_spec_context( + workspace: str | Path, + message: str, + *, + item: TodoItem | None, + store: TodoStore | None, + focus_requested: bool, + inject_todo_spec: bool, + agent_continuation: bool = False, +) -> tuple[str, bool, str | None]: + """ + Prepend task spec + optional spec-focus preamble. + + Returns ``(user_text, spec_focus_active, turn_todo_id)``. + ``spec_focus_active`` is True when the spec-focus preamble was applied (for callers). + """ + turn_todo_id: str | None = None + user_text = message + implement_turn = is_implement_turn_message(message) + if should_inject_task_context( + focus_requested=focus_requested, + item=item, + inject_todo_spec=inject_todo_spec, + agent_continuation=agent_continuation, + message=message, + ): + assert item is not None + turn_todo_id = item.id + if implement_turn and todo_has_spec_content(item): + formatter = format_todo_context_implement + elif todo_has_spec_content(item): + formatter = format_todo_context + else: + formatter = format_todo_context_light + user_text = formatter(item, store=store) + message + preamble = spec_focus_preamble_applies(focus_requested=focus_requested, item=item) + if preamble: + blocks: list[str] = [] + if implement_turn: + if not agent_continuation: + blocks.append(build_spec_focus_preamble(workspace)) + blocks.append(IMPLEMENTATION_TOOL_HINTS.strip()) + blocks.append(SCAFFOLD_HINT.strip()) + if _is_resume_implement_message(message) and item is not None: + from cecli.spec.todos import _implementation_tasks_for_inject + + blocks.append( + "## Open implementation tasks (resume)\n" + + _implementation_tasks_for_inject(item, max_open=4) + ) + checklist = item.checklist if item is not None else [] + blocks.append( + build_implement_workspace_block( + workspace, + checklist, + resume=_is_resume_implement_message(message), + message=message, + active_task_title=item.title if item is not None else None, + agent_continuation=agent_continuation, + todo_item=item, + ) + ) + else: + blocks.append(build_spec_focus_preamble(workspace)) + user_text = "\n\n".join(blocks) + "\n\n" + user_text + return user_text, preamble, turn_todo_id + + +def spec_focus_effective_for_api( + *, + focus_requested: bool, + item: TodoItem | None, + inject_todo_spec: bool, +) -> bool: + """Whether the UI/API should treat the turn as spec-focus (preamble or task inject).""" + return spec_focus_preamble_applies( + focus_requested=focus_requested, item=item + ) or should_inject_task_context( + focus_requested=focus_requested, + item=item, + inject_todo_spec=inject_todo_spec, + ) + + +def spec_focus_instructions_snippet() -> str: + """First line marker used in tests.""" + return SPEC_FOCUS_INSTRUCTIONS.splitlines()[0] diff --git a/cecli/spec/gen_agent.py b/cecli/spec/gen_agent.py new file mode 100644 index 00000000000..82a7e469440 --- /dev/null +++ b/cecli/spec/gen_agent.py @@ -0,0 +1,277 @@ +"""Multi-turn, repo-grounded spec generation (Kiro-depth path).""" + +from __future__ import annotations + +import concurrent.futures +import os +import re +from typing import TYPE_CHECKING, Any + +from cecli.spec.generate import ( + SpecSection, + build_generate_message, + compact_spec_gen_enabled, + merge_generated_layers, + parse_generated_layers, +) +from cecli.spec.layers import assess_spec_richness, normalize_spec_layer_traceability +from cecli.spec.runtime import SpecTurnRunner +from cecli.spec.steering import build_spec_focus_preamble + +if TYPE_CHECKING: + from cecli.spec.todos import TodoItem + +_SECTION_LABELS = { + "requirements": "Requirements (EARS)", + "design": "Design", + "tasks_md": "Implementation tasks", + "all": "All spec layers", +} + +# Pattern matches the Cecli reasoning tag used by thinking models (e.g. qwen3.6, deepseek-r1). +_THINKING_TAG_RE = re.compile( + r".*?", + re.DOTALL, +) + + +def _strip_thinking_content(text: str) -> str: + """Remove ... blocks from raw LLM output. + + These blocks contain the model's chain-of-thought reasoning and should not be + included in the parsed spec layers — they often duplicate REQ headings which + triggers EARS_DUP_ID lint errors. + """ + result = _THINKING_TAG_RE.sub("", text).strip() + # Handle case where closing tag exists but opening tag was truncated/missing + closing_pattern = re.compile(r"") + match = closing_pattern.search(result) + if match: + result = result[match.end() :].strip() + return result + + +def spec_gen_agent_enabled() -> bool: + """Repo-grounded multi-turn spec generation (default on; set ``BV_SPEC_GEN_AGENT=0`` to disable).""" + if compact_spec_gen_enabled(): + return False + return os.environ.get("BV_SPEC_GEN_AGENT", "1").strip().lower() not in ( + "0", + "false", + "no", + "off", + ) + + +def spec_gen_richness_gate_enabled() -> bool: + if compact_spec_gen_enabled(): + return False + return os.environ.get("BV_SPEC_GEN_RICHNESS_GATE", "1").strip().lower() not in ( + "0", + "false", + "no", + "off", + ) + + +def build_spec_explore_message( + *, + prompt: str, + section: SpecSection, + item: TodoItem | None, +) -> str: + title = (item.title if item else "").strip() or "Active task" + section_label = _SECTION_LABELS.get(section, section) + return ( + "/agent READ-ONLY repository exploration for upcoming spec generation. " + "Do NOT create, edit, or delete files.\n\n" + f"Task title: {title}\n" + f"Feature request: {prompt.strip()}\n" + f"Target layer: {section_label}\n\n" + "Use agent tools (read/list/grep) to inspect relevant source, architecture, " + "and existing patterns in this repository.\n" + "Reply with 8–15 bullets citing repo-relative paths and how they inform the spec.\n" + "Do NOT write requirements, design, or implementation tasks yet — exploration only." + ) + + +def wrap_spec_generate_message( + workspace: str, + core_message: str, + *, + exploration: str = "", +) -> str: + steering = build_spec_focus_preamble(workspace).strip() + parts: list[str] = [] + if steering: + parts.append(steering) + parts.append(core_message.strip()) + notes = (exploration or "").strip() + if notes: + parts.append("## Repository exploration (ground your spec in these findings)\n" + notes) + return "\n\n".join(parts) + "\n" + + +def _section_richness_suggestions(section: SpecSection, merged: dict[str, str]) -> list[str]: + _, suggestions = assess_spec_richness( + merged.get("requirements", ""), + merged.get("design", ""), + merged.get("tasks_md", ""), + ) + if section == "all": + return suggestions + prefix = { + "requirements": "requirements:", + "design": "design:", + "tasks_md": "tasks:", + }.get(section, "") + if not prefix: + return suggestions + return [s for s in suggestions if s.startswith(prefix)] + + +def build_deepen_message_for_workspace( + *, + workspace: str, + prompt: str, + item: TodoItem, + section: SpecSection, + suggestions: list[str], + exploration: str = "", +) -> str: + deepen_note = "Deepen the spec to Kiro-grade depth:\n" + "\n".join( + f"- {s}" for s in suggestions + ) + combined_prompt = f"{prompt.strip()}\n\n{deepen_note}" + core = build_generate_message(combined_prompt, item=item, section=section, mode="generate") + return wrap_spec_generate_message(workspace, core, exploration=exploration) + + +def _consume_run_message(runner: SpecTurnRunner, message: str, **kwargs: Any) -> str: + parts: list[str] = [] + for event in runner.run_message(message, **kwargs): + if event.get("type") == "token": + parts.append(str(event.get("text") or "")) + elif event.get("type") == "done": + return str(event.get("assistant_text") or "".join(parts)) + return "".join(parts) + + +def run_timed_message( + runner: SpecTurnRunner, + message: str, + *, + timeout_s: float, + **kwargs: Any, +) -> str: + pool = concurrent.futures.ThreadPoolExecutor(max_workers=1) + fut = pool.submit(_consume_run_message, runner, message, **kwargs) + try: + return fut.result(timeout=timeout_s) + except concurrent.futures.TimeoutError as err: + try: + runner.interrupt_turn() + except Exception: + pass + try: + fut.result(timeout=15) + except Exception: + pass + raise TimeoutError(f"Spec generation turn timed out after {int(timeout_s)}s") from err + finally: + pool.shutdown(wait=False, cancel_futures=True) + + +def spec_gen_explore_timeout_s(total_turn_timeout_s: float) -> float: + return max(120.0, total_turn_timeout_s * 0.35) + + +def spec_gen_write_timeout_s(total_turn_timeout_s: float) -> float: + """Budget for the main generate one-shot (after optional explore).""" + if compact_spec_gen_enabled() or not spec_gen_agent_enabled(): + # Compact / single-pass lane: one write uses nearly the full turn cap. + return max(180.0, total_turn_timeout_s - 60.0) + return max(180.0, total_turn_timeout_s * 0.55) + + +def run_spec_layer_llm( + runner: SpecTurnRunner, + *, + workspace: str, + prompt: str, + item: TodoItem, + section: SpecSection, + mode: str, + todo_id: str, + total_turn_timeout_s: float, +) -> str: + """Repo-grounded generate: optional /agent explore, then one-shot write (+ optional deepen).""" + core = build_generate_message(prompt, mode=mode, item=item, section=section) # type: ignore[arg-type] + exploration = "" + runner.apply_spec_gen_route(core) + + if spec_gen_agent_enabled(): + explore_msg = build_spec_explore_message(prompt=prompt, section=section, item=item) + try: + exploration = run_timed_message( + runner, + explore_msg, + timeout_s=spec_gen_explore_timeout_s(total_turn_timeout_s), + preproc=True, + skip_workspace_init=True, + active_todo_id=todo_id, + inject_todo_spec=False, + spec_focus=True, + force_tier="think", + ).strip() + except (TimeoutError, Exception): + exploration = "" + + write_msg = wrap_spec_generate_message(workspace, core, exploration=exploration) + runner.apply_spec_gen_route(write_msg) + raw = runner.run_one_shot( + write_msg, + timeout_s=spec_gen_write_timeout_s(total_turn_timeout_s), + skip_workspace_init=True, + ) + raw = _strip_thinking_content(raw) + + if not spec_gen_richness_gate_enabled(): + return raw + + parsed = parse_generated_layers(raw, section=section) + merged = normalize_spec_layer_traceability( + merge_generated_layers(item, parsed, section=section) + ) + suggestions = _section_richness_suggestions(section, merged) + if not suggestions: + return raw + + from dataclasses import replace + + temp = replace( + item, + requirements=merged.get("requirements", item.requirements), + design=merged.get("design", item.design), + tasks_md=merged.get("tasks_md", item.tasks_md), + ) + deepen_msg = build_deepen_message_for_workspace( + workspace=workspace, + prompt=prompt, + item=temp, + section=section, + suggestions=suggestions, + exploration=exploration, + ) + runner.apply_spec_gen_route(deepen_msg) + raw2 = runner.run_one_shot( + deepen_msg, + timeout_s=max(120.0, total_turn_timeout_s * 0.35), + skip_workspace_init=True, + ) + raw2 = _strip_thinking_content(raw2) + if raw2.strip(): + if section == "requirements" and "### REQ-" in raw2: + return raw2.strip() + return raw.rstrip() + "\n\n--- deepen pass ---\n\n" + raw2.strip() + return raw diff --git a/cecli/spec/generate.py b/cecli/spec/generate.py new file mode 100644 index 00000000000..a1b4cb56df3 --- /dev/null +++ b/cecli/spec/generate.py @@ -0,0 +1,528 @@ +# flake8: noqa: E501 +""" +LLM-assisted three-layer todo spec generation and parsing. +""" + +from __future__ import annotations + +import os +import re +from typing import Literal + +from cecli.spec.ears.prompt import format_spec_quality_for_prompt +from cecli.spec.todos import TodoItem + +GenerateMode = Literal["generate", "refine"] +SpecSection = Literal["all", "requirements", "design", "tasks_md"] + +_SECTION_HEADERS = { + "## requirements": "requirements", + "## design": "design", + "## implementation tasks": "tasks_md", + "## tasks": "tasks_md", + "## implementation plan": "tasks_md", + "## implementation steps": "tasks_md", +} + +_DEEPEN_PASS_MARKER = "--- deepen pass ---" + +# --- Kiro-style layer guidance (no curly braces: these are concatenated into +# --- .format() templates, so any "{" would be parsed as a field). --- + +_REQUIREMENTS_FORMAT = """\ +Write thorough, professional, Kiro-style requirements. Favor completeness over brevity — this is the contract the design and implementation are built against. +- Begin with a `### Introduction` section: 2–4 sentences describing the feature, who uses it, the problem it solves, and its scope and boundaries. +- Add one `### REQ-NNN: ` section per requirement, with a unique zero-padded id (REQ-001, REQ-002, …) and a short descriptive title. +- Under each requirement, write a `**User Story:** As a <role>, I want <capability>, so that <benefit>.` line. Use a concrete role (not just "user") whenever the feature implies one, and state a real benefit. +- Follow it with an `**Acceptance Criteria**` numbered list of EARS clauses. Each clause is a complete sentence using **THE** system **SHALL** with a trigger: **WHEN** <event>, **IF** <condition> **THEN**, **WHILE** <state>, or **WHERE** <feature> — or a ubiquitous **THE** system **SHALL** statement. +- Give every requirement at least two acceptance criteria. Across the whole document, deliberately cover the happy path, boundary and edge cases, invalid input and error handling, and the relevant non-functional needs (performance, security, privacy, accessibility, observability). +- Decompose broad features into at least three focused, independently testable requirements rather than one catch-all. Only a genuinely trivial feature should have fewer. +- Be specific and unambiguous: name concrete states, events, values, and limits instead of vague phrases like "handle errors", "fast", or "as needed". +""" + +_DESIGN_FORMAT = """\ +Be comprehensive and concrete — this is the technical blueprint an engineer implements directly from, so include enough detail that no major decision is left implicit. Use these level-3 (###) subsections: +- `### Overview` — what is being built and why, tied to the requirements; summarize the chosen approach and the key technical decisions and trade-offs. +- `### Architecture` — the major pieces and how requests and data flow between them; include a Mermaid or ASCII diagram when it clarifies the structure. +- `### Components and Interfaces` — each component, its single responsibility, and the key function/method/endpoint signatures (names, parameters, return types). +- `### Data Models` — important types and their fields, validation rules, and how they are persisted or transmitted. +- `### Error Handling` — the failure modes, how the system detects them, and both the user-visible and internal responses. +- `### Testing Strategy` — unit, integration, and end-to-end coverage, plus the edge cases and non-functional checks that need dedicated tests. +Ground the design in this repository: reference concrete modules, files, and existing patterns rather than inventing greenfield structure. Cite the REQ ids each component or decision satisfies (e.g. REQ-001), and make sure every requirement is covered by some part of the design. +""" + +_TASKS_FORMAT = """\ +Break the work into incremental, test-driven coding steps a developer can execute top to bottom: +- Use a numbered checklist (`- [ ] 1.`, `- [ ] 2.`, …); add sub-steps (1.1, 1.2) to decompose larger steps. +- Each step is a concrete, actionable coding task — write or modify specific code or tests — not project management, deployment, or manual QA. +- Keep each step small enough to complete and verify on its own, and sequence them so every step builds only on earlier ones (no forward references). +- Pair implementation steps with the tests that cover them; prefer writing or updating tests alongside or before the code. +- End each step with the requirement ids it implements (e.g. `_Requirements: REQ-001, REQ-002_`) and a `(depends: none|N)` marker. +- Cover every requirement and every major design component with at least one task; do not leave parts of the design unimplemented. +""" + +# Shorter prompts for LLM e2e / dogfood on small Ollama models (BV_COMPACT_SPEC_GEN=1). +# Product UI keeps full Kiro-grade prompts unless the env is set. +_REQUIREMENTS_FORMAT_COMPACT = """\ +Write concise requirements only: +- `### Introduction` — 2-3 sentences. +- Exactly **two** `### REQ-NNN` sections; each with one short **User Story** and **two** numbered acceptance lines. +- Every acceptance line MUST include both **WHEN** and **THE** system **SHALL** (copy the example shape exactly). +""" + +_REQUIREMENTS_EXAMPLE_COMPACT = """\ +Format example (replace feature text; keep the EARS shape): + +### Introduction +Clients need a minimal health check before pairing. + +### REQ-001: Liveness +**User Story:** As a client, I want a health endpoint, so that I can detect uptime. + +**Acceptance Criteria** +1. **WHEN** a client sends `GET /health` **THE** system **SHALL** respond with HTTP 200 and a JSON status field. +2. **WHEN** the core is starting **THE** system **SHALL** respond with HTTP 503 until ready. + +### REQ-002: Payload +**User Story:** As a client, I want a stable body shape, so that parsers do not break. + +**Acceptance Criteria** +1. **WHEN** the health endpoint returns 200 **THE** system **SHALL** include a `status` string in the JSON body. +2. **WHEN** the status is ok **THE** system **SHALL** use the literal value `ok`. +""" + +_DESIGN_FORMAT_COMPACT = """\ +Keep the design under 35 lines. Use only these subsections: +- `### Overview` — 2-4 sentences citing REQ ids. +- `### Architecture` — a short bullet list citing REQ ids. +Do not add Components, Data Models, Error Handling, or Testing Strategy sections. +""" + +_TASKS_FORMAT_COMPACT = """\ +Exactly **two** numbered checklist items with `(depends: none|1)`; cite REQ ids in each line. +""" + + +def compact_spec_gen_enabled() -> bool: + """True when LLM lanes should use shorter generate-spec prompts (faster 3b runs).""" + return os.environ.get("BV_COMPACT_SPEC_GEN", "").strip().lower() in ( + "1", + "true", + "yes", + "on", + ) + + +def _requirements_format() -> str: + return _REQUIREMENTS_FORMAT_COMPACT if compact_spec_gen_enabled() else _REQUIREMENTS_FORMAT + + +def _requirements_example() -> str: + return _REQUIREMENTS_EXAMPLE_COMPACT if compact_spec_gen_enabled() else _REQUIREMENTS_EXAMPLE + + +def _design_format() -> str: + return _DESIGN_FORMAT_COMPACT if compact_spec_gen_enabled() else _DESIGN_FORMAT + + +def _tasks_format() -> str: + return _TASKS_FORMAT_COMPACT if compact_spec_gen_enabled() else _TASKS_FORMAT + + +def _design_example() -> str: + return _DESIGN_EXAMPLE_COMPACT if compact_spec_gen_enabled() else _DESIGN_EXAMPLE + + +def _generate_all_layers_body() -> str: + return ( + "## Requirements\n" + _requirements_format() + "\n" + "## Design\n" + _design_format() + "\n" + "## Implementation tasks\n" + _tasks_format() + "\n" + _ALL_EXAMPLE + ) + + +_REQUIREMENTS_EXAMPLE = """\ +Format example (replace with the real feature; do not copy this content): + +### Introduction +The health endpoint lets clients confirm the API is reachable before pairing. + +### REQ-001: Health check +**User Story:** As a client app, I want a health endpoint, so that I can confirm the API is up. + +**Acceptance Criteria** +1. **WHEN** a client sends `GET /health` **THE** system **SHALL** respond with HTTP 200 and a JSON status body. +2. **IF** the core is still starting **THEN THE** system **SHALL** respond with HTTP 503 and a retry hint. +""" + +_DESIGN_EXAMPLE = """\ +Format example (structure only): + +### Overview +Implements REQ-001 as an HTTP route. +### Architecture +FastAPI app -> health handler -> status payload. +### Components and Interfaces +- `health()` returns the status payload — REQ-001. +### Data Models +A Status value with an "ok" boolean field. +### Error Handling +Return HTTP 503 while the core is starting (REQ-001). +### Testing Strategy +An HTTP test asserts 200 and a JSON body for REQ-001. +""" + +_DESIGN_EXAMPLE_COMPACT = """\ +Format example (structure only): + +### Overview +Implements REQ-001 as an HTTP route (REQ-001). +### Architecture +- FastAPI route `GET /health` — REQ-001. +""" + +_TASKS_EXAMPLE = """\ +Format example: + +- [ ] 1. Add the health route and status payload — _Requirements: REQ-001_ (depends: none) + - [ ] 1.1 Return HTTP 503 while the core is starting (depends: none) +- [ ] 2. Add an HTTP test asserting 200 and a JSON body — _Requirements: REQ-001_ (depends: 1) +""" + +_TASKS_EXAMPLE_COMPACT = """\ +Format example (copy this shape exactly): + +- [ ] 1. Add the health route — _Requirements: REQ-001_ (depends: none) +- [ ] 2. Add an HTTP test — _Requirements: REQ-001_ (depends: 1) +""" + + +def _tasks_example() -> str: + return _TASKS_EXAMPLE_COMPACT if compact_spec_gen_enabled() else _TASKS_EXAMPLE + + +_ALL_EXAMPLE = """\ +Format example (structure only; replace with the real feature): + +## Requirements +### Introduction +The health endpoint lets clients confirm the API is reachable. + +### REQ-001: Health check +**User Story:** As a client, I want a health endpoint, so that I can confirm the API is up. + +**Acceptance Criteria** +1. **WHEN** a client sends `GET /health` **THE** system **SHALL** respond with HTTP 200 and a JSON status. +2. **IF** the core is still starting **THEN THE** system **SHALL** respond with HTTP 503. + +## Design +### Overview +Implements REQ-001 as an HTTP route. +### Architecture +FastAPI app -> health handler -> status payload. +### Components and Interfaces +- `health()` returns the status payload — REQ-001. +### Data Models +A Status value with an "ok" boolean field. +### Error Handling +Return HTTP 503 while starting (REQ-001). +### Testing Strategy +An HTTP test asserts 200 for REQ-001. + +## Implementation tasks +- [ ] 1. Add the health route — _Requirements: REQ-001_ (depends: none) +- [ ] 2. Add an HTTP test for the route — _Requirements: REQ-001_ (depends: 1) +""" + +_GENERATE_TEMPLATE_PREFIX = ( + "You are a senior software architect writing a complete, production-grade spec-driven " + "development plan for this repository. Do not edit any files.\n\n" + "Feature request:\n{prompt}\n\n" + "{existing}{ears_context}\n\n" + "{depth}" + "Respond with markdown only. Use exactly these three level-2 (##) headings and no other " + "level-2 headings; use level-3 (###) for every subsection:\n\n" +) + +_REQUIREMENTS_SECTION_PREFIX = ( + "You are a senior product engineer writing the requirements layer for a spec-driven task. " + "Do not edit any files.\n\n" + "Feature request:\n{prompt}\n\n" + "{existing_requirements}{ears_context}\n\n" + "{depth}" + "Respond with markdown only, under a single level-2 heading:\n\n" + "## Requirements\n" +) + +_DESIGN_SECTION_PREFIX = ( + "You are a senior software architect writing the design layer for a spec-driven task. " + "Do not edit any files.\n\n" + "Task title: {title}\n\n" + "## Requirements (approved — the design must satisfy every REQ id)\n{requirements}\n\n" + "Design note:\n{prompt}\n\n" + "{existing_design}{ears_context}\n\n" + "{depth}" + "Respond with markdown only, under a single level-2 heading:\n\n" + "## Design\n" +) + +_TASKS_SECTION_PREFIX = ( + "You are a senior engineer writing the implementation tasks layer for a spec-driven task. " + "Do not edit any files.\n\n" + "Task title: {title}\n\n" + "## Requirements\n{requirements}\n\n" + "## Design\n{design}\n\n" + "Implementation note:\n{prompt}\n\n" + "{existing_tasks}{ears_context}\n\n" + "{depth}" + "Respond with markdown only, under a single level-2 heading:\n\n" + "## Implementation tasks\n" +) + +_REFINE_TEMPLATE_PREFIX = ( + "You are a senior reviewer improving a spec-driven task to production grade. " + "Do not edit any files.\n\n" + "Task title: {title}\n\n" + "## Requirements\n{requirements}\n\n" + "## Design\n{design}\n\n" + "## Implementation tasks\n{tasks_md}\n\n" + "User note: {prompt}\n{ears_context}\n\n" + "Output an improved version with the same three level-2 (##) headings " + "(## Requirements, ## Design, ## Implementation tasks). {refine_depth}Follow this structure:\n\n" +) + +# Thoroughness framing — added only in full (non-compact) mode. In compact mode these +# verbose instructions confuse small local models (e.g. llama3.2:3b) into emitting prose +# instead of the tight structure the compact format demands. +_GENERATE_DEPTH = ( + "Think carefully about the real problem behind the request, the users involved, the edge " + "cases, and how this fits the existing codebase, then write a thorough plan — not a " + "skeleton. Prefer completeness and precise wording over brevity; do not omit a section " + "because the request is short.\n\n" +) +_REQUIREMENTS_DEPTH = ( + "Infer the unstated needs behind the request — the roles involved, the edge cases, the " + "error and non-functional concerns — and capture them explicitly. Write a thorough, " + "precisely worded set of requirements rather than a minimal one.\n\n" +) +_DESIGN_DEPTH = ( + "Produce a concrete, implementation-ready design grounded in this repository's real modules " + "and patterns. Be thorough: explain the approach and trade-offs, not just the structure.\n\n" +) +_TASKS_DEPTH = ( + "Produce a complete, ordered plan that covers every requirement and design component as " + "incremental, test-driven coding steps. Be thorough rather than high-level.\n\n" +) +_REFINE_DEPTH = ( + "Deepen every thin or vague section with concrete detail, sharpen weak wording, add missing " + "edge cases and non-functional requirements, fix contradictions between layers, ensure every " + "REQ id is covered by the design and tasks, and resolve every EARS issue listed above. Do " + "not drop or weaken any content that is already strong. " +) +# Compact refine still needs the EARS-fix instruction (the gate enforces it) but stays terse. +_REFINE_DEPTH_COMPACT = ( + "Fix contradictions between layers, ensure every REQ id is covered, and resolve every EARS " + "issue listed above. " +) + + +def _generate_depth() -> str: + return "" if compact_spec_gen_enabled() else _GENERATE_DEPTH + + +def _requirements_depth() -> str: + return "" if compact_spec_gen_enabled() else _REQUIREMENTS_DEPTH + + +def _design_depth() -> str: + return "" if compact_spec_gen_enabled() else _DESIGN_DEPTH + + +def _tasks_depth() -> str: + return "" if compact_spec_gen_enabled() else _TASKS_DEPTH + + +def _refine_depth() -> str: + return _REFINE_DEPTH_COMPACT if compact_spec_gen_enabled() else _REFINE_DEPTH + + +def _optional_existing_block(label: str, text: str) -> str: + body = (text or "").strip() + if not body: + return "" + return f"Existing {label} (improve and extend):\n{body}\n\n" + + +def build_generate_message( + prompt: str, + *, + mode: GenerateMode = "generate", + item: TodoItem | None = None, + section: SpecSection = "all", +) -> str: + ears_context = "" + if item and (mode == "refine" or section in ("all", "requirements")): + ears_context = format_spec_quality_for_prompt( + item.requirements, + item.design, + item.tasks_md, + ) + if mode == "refine" and item: + return _REFINE_TEMPLATE_PREFIX.format( + title=item.title, + requirements=item.requirements.strip() or "(empty)", + design=item.design.strip() or "(empty)", + tasks_md=item.tasks_md.strip() or "(empty)", + prompt=prompt.strip() or "Review for consistency.", + ears_context=ears_context, + refine_depth=_refine_depth(), + ) + (_requirements_format() + "\n" + _design_format() + "\n" + _tasks_format()) + if section == "requirements": + existing = _optional_existing_block( + "requirements draft", + item.requirements if item else "", + ) + return _REQUIREMENTS_SECTION_PREFIX.format( + prompt=prompt.strip(), + existing_requirements=existing, + ears_context=ears_context, + depth=_requirements_depth(), + ) + (_requirements_format() + "\n" + _requirements_example()) + if section == "design" and item: + return _DESIGN_SECTION_PREFIX.format( + title=item.title, + requirements=item.requirements.strip() or "(empty)", + prompt=prompt.strip(), + existing_design=_optional_existing_block("design draft", item.design), + ears_context=ears_context, + depth=_design_depth(), + ) + (_design_format() + "\n" + _design_example()) + if section == "tasks_md" and item: + return _TASKS_SECTION_PREFIX.format( + title=item.title, + requirements=item.requirements.strip() or "(empty)", + design=item.design.strip() or "(empty)", + prompt=prompt.strip(), + existing_tasks=_optional_existing_block("implementation tasks draft", item.tasks_md), + ears_context=ears_context, + depth=_tasks_depth(), + ) + (_tasks_format() + "\n" + _tasks_example()) + existing = "" + if item and (item.requirements or item.design or item.tasks_md): + existing = ( + "Existing draft (improve and extend):\n" + f"Requirements:\n{item.requirements}\n\n" + f"Design:\n{item.design}\n\n" + f"Implementation tasks:\n{item.tasks_md}\n" + ) + return ( + _GENERATE_TEMPLATE_PREFIX.format( + prompt=prompt.strip(), + existing=existing, + ears_context=ears_context, + depth=_generate_depth(), + ) + + _generate_all_layers_body() + ) + + +def _parse_generated_layers_once(text: str) -> dict[str, str]: + sections: dict[str, list[str]] = {k: [] for k in ("requirements", "design", "tasks_md")} + current: str | None = None + + for line in text.replace("\r\n", "\n").split("\n"): + key = _SECTION_HEADERS.get(line.strip().lower()) + if key: + current = key + continue + if current: + sections[current].append(line) + + return {k: "\n".join(v).strip() for k, v in sections.items()} + + +def _merge_parsed_layers( + base: dict[str, str], + overlay: dict[str, str], +) -> dict[str, str]: + out = dict(base) + for key, value in overlay.items(): + if (value or "").strip(): + out[key] = value + return out + + +def parse_generated_layers(text: str, *, section: SpecSection = "all") -> dict[str, str]: + """Extract requirements, design, and tasks_md from model markdown.""" + raw = (text or "").replace("\r\n", "\n") + if _DEEPEN_PASS_MARKER in raw: + head, _, tail = raw.partition(_DEEPEN_PASS_MARKER) + out = _merge_parsed_layers( + _parse_generated_layers_once(head), + _parse_generated_layers_once(tail), + ) + else: + out = _parse_generated_layers_once(raw) + + if not any(out.values()): + cleaned = _strip_fences(raw) + if cleaned: + if section == "design": + out["design"] = cleaned + elif section == "tasks_md": + out["tasks_md"] = cleaned + else: + out["requirements"] = cleaned + elif section == "tasks_md" and not (out.get("tasks_md") or "").strip(): + cleaned = _strip_fences(raw) + if cleaned and re.search(r"(?m)^\s*(?:-\s*\[[ xX]\]\s*)?\d+\.", cleaned): + out["tasks_md"] = cleaned + return out + + +def merge_generated_layers( + item: TodoItem, + parsed: dict[str, str], + *, + section: SpecSection, +) -> dict[str, str]: + """Merge parsed output with stored layers for phased apply.""" + if section == "all": + return { + "requirements": parsed.get("requirements", "") or item.requirements, + "design": parsed.get("design", "") or item.design, + "tasks_md": parsed.get("tasks_md", "") or item.tasks_md, + } + if section == "requirements": + return { + "requirements": parsed.get("requirements", "") or item.requirements, + "design": item.design, + "tasks_md": item.tasks_md, + } + if section == "design": + return { + "requirements": item.requirements, + "design": parsed.get("design", "") or item.design, + "tasks_md": item.tasks_md, + } + return { + "requirements": item.requirements, + "design": item.design, + "tasks_md": parsed.get("tasks_md", "") or item.tasks_md, + } + + +def validate_section_prerequisites(item: TodoItem, section: SpecSection) -> None: + if section == "design" and not item.requirements.strip(): + raise ValueError("Generate requirements before design") + if section == "tasks_md": + if not item.requirements.strip(): + raise ValueError("Generate requirements before implementation tasks") + if not item.design.strip(): + raise ValueError("Generate design before implementation tasks") + + +def _strip_fences(text: str) -> str: + t = text.strip() + m = re.match(r"^```(?:markdown|md)?\s*\n(.*)\n```\s*$", t, re.DOTALL | re.I) + return m.group(1).strip() if m else t diff --git a/cecli/spec/implement.py b/cecli/spec/implement.py new file mode 100644 index 00000000000..c5e45f889b5 --- /dev/null +++ b/cecli/spec/implement.py @@ -0,0 +1,470 @@ +"""Ground spec-focus implement turns in on-disk workspace facts (avoid ls loops).""" + +from __future__ import annotations + +import os +import re +import shutil +import subprocess +from pathlib import Path + +from cecli.spec.todos import ChecklistItem + +_PATH_IN_CHECKLIST = re.compile( + r"`([^`\n]+)`|(?<![`\w])((?:[\w.-]+/)+[\w./-]+)", +) + +_MAX_TOP_LEVEL = 20 + +_SNAPSHOT_ORIENTATION = ( + "- **Note:** Top-level listing is orientation only — do **not** ContextManager-add " + "these entries unless the checklist or implementation tasks name them." +) + +_NO_PATH_NEXT_ACTION = ( + "This checklist item names **no file paths**. Use **## Implementation tasks** " + "(injected above) for deliverable paths for this step — pick **one missing file**, " + "then **ContextManager create** → **ReadRange** → **EditText**. " + "**Do not** skip to later numbered tasks." +) + + +def paths_from_checklist_text(text: str) -> list[str]: + found: list[str] = [] + seen: set[str] = set() + for match in _PATH_IN_CHECKLIST.finditer(text or ""): + raw = (match.group(1) or match.group(2) or "").strip().rstrip("/") + if not raw or raw in seen: + continue + if "/" not in raw and not re.search(r"\.[a-z]{2,5}$", raw, re.IGNORECASE): + continue + seen.add(raw) + found.append(raw) + return found + + +def deliverable_paths_exist(workspace: str | Path, paths: list[str]) -> bool: + """True when every path is an existing file or non-empty directory.""" + root = Path(workspace).resolve() + if not paths: + return False + for rel in paths: + target = root / rel + if target.is_file(): + continue + if target.is_dir() and any(target.iterdir()): + continue + return False + return True + + +_IMPLEMENT_ONLY_TASK_RE = re.compile( + r"^implement only implementation task\s+([\d.]+)\s*:", + re.IGNORECASE, +) +_STEP_PREFIX_RE = re.compile(r"^(\d+(?:\.\d+)*)\b") + + +def checklist_step_prefix(text: str) -> str | None: + m = _STEP_PREFIX_RE.match((text or "").strip()) + return m.group(1) if m else None + + +def implement_step_from_message(message: str) -> str | None: + trimmed = (message or "").strip() + if trimmed.lower().startswith("/agent"): + trimmed = trimmed[6:].lstrip() + m = _IMPLEMENT_ONLY_TASK_RE.match(trimmed) + return m.group(1) if m else None + + +def step_sort_key(step: str) -> tuple[int, ...]: + try: + return tuple(int(part) for part in step.split(".")) + except ValueError: + return (999,) + + +def is_step_after(candidate: str, focus: str) -> bool: + return step_sort_key(candidate) > step_sort_key(focus) + + +def is_test_related_checklist_text(text: str) -> bool: + lower = (text or "").lower() + return "test" in lower or "verify" in lower + + +def first_open_checklist_item(checklist: list[ChecklistItem]) -> ChecklistItem | None: + for entry in checklist: + if not entry.done and entry.text.strip(): + return entry + return None + + +def focus_checklist_item( + checklist: list[ChecklistItem], + *, + message: str | None = None, + active_task_title: str | None = None, +) -> ChecklistItem | None: + """Pick the checklist row this turn should work — aligns with UI active task when possible.""" + if not checklist: + return None + + step = implement_step_from_message(message or "") + if step: + for entry in checklist: + if not entry.done and ( + entry.text.strip().startswith(step + " ") + or entry.text.strip().startswith(step + ".") + ): + return entry + for entry in checklist: + if entry.text.strip().startswith(step + " ") or entry.text.strip().startswith( + step + "." + ): + return entry + + title = (active_task_title or "").strip() + if title: + title_lower = title.lower() + for entry in checklist: + if not entry.done and title_lower in entry.text.lower(): + return entry + step_from_title = checklist_step_prefix(title) + if step_from_title: + for entry in checklist: + if not entry.done and entry.text.strip().startswith(step_from_title): + return entry + # Active task may still name a row falsely marked done (agent UpdateTodoList). + for entry in checklist: + if title_lower in entry.text.lower(): + return entry + if step_from_title: + for entry in checklist: + if entry.text.strip().startswith(step_from_title): + return entry + + return first_open_checklist_item(checklist) + + +def checklist_item_for_agent_row( + checklist: list[ChecklistItem], + row_text: str, +) -> ChecklistItem: + """Synthetic checklist row for agent todo focus (always ``done=False``).""" + import uuid + + text = (row_text or "").strip() + for entry in checklist: + if entry.text.strip() == text: + return ChecklistItem(id=entry.id, text=entry.text, done=False) + step = checklist_step_prefix(text) + if step: + for entry in checklist: + prefix = entry.text.strip() + if prefix.startswith(step + " ") or prefix.startswith(step + "."): + return ChecklistItem(id=entry.id, text=entry.text, done=False) + return ChecklistItem(id=uuid.uuid4().hex[:8], text=text, done=False) + + +def resolve_implement_focus( + checklist: list[ChecklistItem], + *, + message: str | None = None, + active_task_title: str | None = None, + agent_todo_rows: list | None = None, +) -> tuple[ChecklistItem | None, bool]: + """ + Pick focus for implement/resume turns. + + Returns ``(focus, from_agent_todo)``. When the workspace checklist is fully + checked but agent ``todo.txt`` has ``→ current``, the agent row wins. + """ + focus = focus_checklist_item( + checklist, + message=message, + active_task_title=active_task_title, + ) + if focus is not None: + return focus, False + + if not agent_todo_rows: + return None, False + + from cecli.spec.agent_todos import current_agent_todo_row + + row = current_agent_todo_row(agent_todo_rows) + if row is None or row.done: + return None, False + return checklist_item_for_agent_row(checklist, row.text), True + + +def dart_test_paths_for_focus(workspace: str | Path, focus: ChecklistItem) -> list[str]: + """Dart test files explicitly named in the checklist and present on disk.""" + root = Path(workspace).resolve() + if not (root / "pubspec.yaml").is_file(): + return [] + out: list[str] = [] + for rel in paths_from_checklist_text(focus.text): + if not rel.endswith(".dart"): + continue + if deliverable_paths_exist(root, [rel]): + out.append(rel) + if len(out) >= 4: + break + return out + + +def resolve_flutter_executable() -> str | None: + """Locate ``flutter`` when cecli's shell PATH omits it.""" + flutter_root = os.environ.get("FLUTTER_ROOT", "").strip() + if flutter_root: + candidate = Path(flutter_root).expanduser() / "bin" / "flutter" + if candidate.is_file(): + return str(candidate) + found = shutil.which("flutter") + if found: + return found + home = Path.home() + for candidate in ( + home / "flutter" / "bin" / "flutter", + home / "development" / "flutter" / "bin" / "flutter", + home / "fvm" / "default" / "bin" / "flutter", + Path("/opt/homebrew/bin/flutter"), + Path("/usr/local/bin/flutter"), + ): + if candidate.is_file(): + return str(candidate) + return None + + +def build_workspace_snapshot_lines(workspace: str | Path) -> list[str]: + root = Path(workspace).resolve() + lines = ["## Workspace snapshot (verified on disk — do **not** ls to rediscover)"] + from cecli.spec.pubspec_repair import pubspec_repair_snapshot_lines + + try: + entries = sorted(root.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower())) + except OSError: + entries = [] + top_names = [ + e.name + ("/" if e.is_dir() else "") + for e in entries[:_MAX_TOP_LEVEL] + if not e.name.startswith(".") + ] + if top_names: + preview = ", ".join(f"`{name}`" for name in top_names) + extra = f" (+{len(entries) - _MAX_TOP_LEVEL} more)" if len(entries) > _MAX_TOP_LEVEL else "" + lines.append(f"- **Top level:** {preview}{extra}") + + lines.append(_SNAPSHOT_ORIENTATION) + lines.extend(pubspec_repair_snapshot_lines(root)) + return lines + + +def _flutter_project(workspace: str | Path) -> bool: + return (Path(workspace).resolve() / "pubspec.yaml").is_file() + + +def build_implement_next_action_lines( + workspace: str | Path, + checklist: list[ChecklistItem], + *, + resume: bool, + focus: ChecklistItem | None = None, + message: str | None = None, + active_task_title: str | None = None, + agent_todo_rows: list | None = None, + from_agent_todo: bool = False, +) -> list[str]: + lines = ["## Next action (this turn)"] + if focus is None: + focus, from_agent_todo = resolve_implement_focus( + checklist, + message=message, + active_task_title=active_task_title, + agent_todo_rows=agent_todo_rows, + ) + if focus is None: + lines.append( + "All checklist items are marked done. Run project tests if applicable, " + "then update the task status — **no ls/Grep exploration**." + ) + return lines + + if from_agent_todo: + lines.append( + "**Agent todo** is the current step (workspace checklist is fully checked " + "or out of sync with UpdateTodoList)." + ) + paths = paths_from_checklist_text(focus.text) + on_disk = deliverable_paths_exist(workspace, paths) if paths else False + named_dart_tests = [ + p for p in paths if p.endswith(".dart") and deliverable_paths_exist(workspace, [p]) + ] + + if is_test_related_checklist_text(focus.text) and named_dart_tests: + target = named_dart_tests[0] + lines.append(f"Focus checklist: **{focus.text.strip()}** — `{target}` is on disk.") + lines.append( + f"1. **ReadRange** `{target}` with `@000` / `000@` once\n" + f"2. **EditText** only if tests need fixes" + ) + if _flutter_project(workspace): + lines.append( + "3. BrightVision runs **`flutter test`** at end of this turn — **do not** run flutter via Command\n" + "4. Mark this checklist item done **only after** BrightVision reports tests passed" + ) + else: + lines.append("3. Mark this checklist item done **only after** edits succeed") + lines.append("**Do not** call ls, Grep, GitStatus, or repeat ReadRange on the same file.") + elif is_test_related_checklist_text(focus.text) and paths and not on_disk: + lines.append(f"Focus checklist: **{focus.text.strip()}**") + lines.append( + "Create the test file(s) **named in this item** with **ContextManager create**, " + "then **ReadRange** + **EditText**. **No ls.**" + ) + elif is_test_related_checklist_text(focus.text): + lines.append(f"Focus checklist: **{focus.text.strip()}**") + lines.append( + "Name target file path(s) in the checklist, then **ContextManager** / **ReadRange** / " + "**EditText** on **one** file. **No ls.**" + ) + elif on_disk: + target = paths[0] + lines.append( + f"Focus checklist: **{focus.text.strip()}** — paths exist on disk (`{target}`)." + ) + lines.append( + "**ReadRange** the target source file, then **EditText** to finish. **No ls.**" + ) + elif paths and not on_disk: + lines.append(f"Focus checklist: **{focus.text.strip()}**") + lines.append( + "Paths **named in this item** are not on disk yet — use **ContextManager create** " + "(not `add` on missing files), then **ReadRange** + **EditText** on **one** target file." + ) + lines.append( + "**Do not** ls, Grep, or ReadRange paths **not named** in this checklist item." + ) + elif not paths: + lines.append(f"Focus checklist: **{focus.text.strip()}**") + lines.append(_NO_PATH_NEXT_ACTION) + elif resume: + lines.append(f"Focus checklist: **{focus.text.strip()}**") + lines.append( + "Use **ReadRange** + **EditText** on **one file** for this item. " + "**Do not** ls, Grep, or GitStatus — use the workspace snapshot above." + ) + else: + lines.append(f"Focus checklist: **{focus.text.strip()}**") + lines.append( + "Work **this item only** — do not skip ahead to later numbered tasks. " + "**ContextManager** / **ReadRange** / **EditText**. **No ls.**" + ) + lines.append( + "**Scope:** Mark **only** this checklist item done in UpdateTodoList — " + "do not mark later steps (e.g. 2.x) until the user starts a new Implement turn." + ) + return lines + + +_IMPLEMENT_CONTINUATION_HINT = """\ +## Continue (trimmed — token limit / auto-continue) + +Work **only** the **Next action** checklist item above. One **EditText** per file. +Do **not** ls, Grep, or GitStatus. Do **not** re-read the full spec. +Do **not** mark items done until edits succeed and BrightVision verifies tests (when applicable).""" + + +def build_implement_workspace_block( + workspace: str | Path, + checklist: list[ChecklistItem] | None, + *, + resume: bool, + message: str | None = None, + active_task_title: str | None = None, + agent_continuation: bool = False, + todo_item: object | None = None, +) -> str: + """Markdown block injected on implement / resume turns.""" + from cecli.spec.agent_todos import load_agent_todo_rows + + agent_rows = load_agent_todo_rows(workspace, todo_item) # type: ignore[arg-type] + parts = build_workspace_snapshot_lines(workspace) + checklist = checklist or [] + if checklist or agent_rows: + parts.append("") + parts.extend( + build_implement_next_action_lines( + workspace, + checklist, + resume=resume, + message=message, + active_task_title=active_task_title, + agent_todo_rows=agent_rows, + ) + ) + if agent_continuation: + parts.append("") + parts.append(_IMPLEMENT_CONTINUATION_HINT.strip()) + parts.append("") + parts.append( + "**Hard rule:** Do not batch UpdateTodoList JSON with other tool args. " + "One tool per call. Do not call **ls** when this snapshot is present." + ) + return "\n".join(parts) + + +def edited_dart_test_files(edited_files: list[str]) -> list[str]: + out: list[str] = [] + for raw in edited_files: + rel = raw.replace("\\", "/").lstrip("./") + if rel.startswith("test/") and rel.endswith("_test.dart"): + out.append(rel) + return out + + +def run_flutter_tests(workspace: str | Path, test_paths: list[str]) -> tuple[bool, str]: + """Run ``flutter test`` on specific files; return (passed, combined output).""" + root = Path(workspace).resolve() + if not (root / "pubspec.yaml").is_file(): + return False, "pubspec.yaml missing — cannot run flutter test" + if not test_paths: + return False, "no test paths" + flutter = resolve_flutter_executable() + if not flutter: + return False, "flutter not found on PATH (install Flutter or set FLUTTER_ROOT)" + cmd = [flutter, "test", *test_paths] + try: + proc = subprocess.run( + cmd, + cwd=str(root), + capture_output=True, + text=True, + timeout=300, + env={**os.environ, "PATH": os.pathsep.join(_flutter_path_entries(flutter))}, + ) + except subprocess.TimeoutExpired: + return False, "flutter test timed out after 300s" + except FileNotFoundError: + return False, "flutter not found on PATH" + out = (proc.stdout or "") + (proc.stderr or "") + tail = out.strip()[-4000:] if out.strip() else "(no output)" + return proc.returncode == 0, tail + + +def _flutter_path_entries(flutter_bin: str) -> list[str]: + entries: list[str] = [] + bin_dir = str(Path(flutter_bin).resolve().parent) + if bin_dir: + entries.append(bin_dir) + entries.extend(os.environ.get("PATH", "").split(os.pathsep)) + seen: set[str] = set() + out: list[str] = [] + for entry in entries: + if entry and entry not in seen: + seen.add(entry) + out.append(entry) + return out diff --git a/cecli/spec/job_debug.py b/cecli/spec/job_debug.py new file mode 100644 index 00000000000..b70596a3111 --- /dev/null +++ b/cecli/spec/job_debug.py @@ -0,0 +1,86 @@ +"""Debug export bundle for background todo spec generation jobs.""" + +from __future__ import annotations + +import platform +import sys +from datetime import datetime, timezone +from typing import Any + +from cecli.spec.jobs import SpecGenerationJob, job_wall_timeout_s, spec_gen_timeout_s + +_MAX_RAW_PREVIEW = 12_000 + + +def _truncate_text(text: str, max_len: int) -> str: + if len(text) <= max_len: + return text + return text[: max_len - 3] + "..." + + +def _duplicate_call_hints(invocations: list[dict[str, Any]]) -> list[str]: + seen: dict[str, int] = {} + hints: list[str] = [] + for inv in invocations: + key = f"{inv.get('tool', '')}:{inv.get('args_preview', '')}" + seen[key] = seen.get(key, 0) + 1 + if seen[key] == 2: + hints.append(f"Duplicate tool call: {key[:120]}") + return hints + + +def _tool_invocations(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + out: list[dict[str, Any]] = [] + for msg in messages: + for inv in msg.get("tool_invocations") or []: + if isinstance(inv, dict): + out.append(inv) + return out + + +def build_spec_job_debug_export(job: SpecGenerationJob) -> dict[str, Any]: + """JSON-serializable debug bundle for a spec generation job (live or finished).""" + messages = list(job.messages or []) + invocations = _tool_invocations(messages) + + return { + "format": "brightvision-spec-job-debug-v1", + "exported_at": datetime.now(timezone.utc).isoformat(), + "job_id": job.job_id, + "job": { + "status": job.status, + "workspace": job.workspace, + "todo_id": job.todo_id, + "model": job.model, + "mode": job.mode, + "section": job.section, + "prompt_preview": _truncate_text(job.prompt, 500), + "error": job.error, + "ears_blocked": bool(job.ears_blocked), + "ears_issues": list(getattr(job, "ears_issues", None) or []), + "created_at": job.created_at, + "updated_at": job.updated_at, + "wall_timeout_s": job_wall_timeout_s(job), + "turn_timeout_s": getattr(job, "turn_timeout_s", None), + }, + "environment": { + "python": sys.version.split()[0], + "platform": platform.platform(), + "spec_gen_timeout_s": spec_gen_timeout_s(), + }, + "result_preview": { + "requirements_chars": len(job.requirements or ""), + "design_chars": len(job.design or ""), + "tasks_md_chars": len(job.tasks_md or ""), + "raw_preview": _truncate_text(job.raw or "", 4000), + }, + "messages": messages, + "tool_invocations": invocations, + "duplicate_tool_call_hints": _duplicate_call_hints(invocations), + "recent_io_events": list(job.recent_io_events or []), + "notes": ( + "Spec jobs run in a short-lived headless session separate from chat. " + "Export while running or after error/timeout to diagnose stalled generation. " + "Redact secrets before posting publicly." + ), + } diff --git a/cecli/spec/jobs.py b/cecli/spec/jobs.py new file mode 100644 index 00000000000..15a9a962c72 --- /dev/null +++ b/cecli/spec/jobs.py @@ -0,0 +1,98 @@ +"""Background spec-generation job types and timeout helpers (store lives in Vision HTTP).""" + +from __future__ import annotations + +import os +import time +from dataclasses import dataclass, field +from typing import Any, Literal + +JobStatus = Literal["pending", "running", "completed", "error"] + +_MAX_JOBS = 64 +_JOB_TTL_S = 3600 +_DEFAULT_WAIT_S = 1200.0 + + +def spec_gen_timeout_s() -> float: + """Wall-clock cap for background generate-spec jobs (pytest + HTTP sync wait).""" + raw = os.environ.get("LLM_SPEC_GEN_TIMEOUT_S", str(int(_DEFAULT_WAIT_S))) + try: + return max(60.0, float(raw)) + except ValueError: + return _DEFAULT_WAIT_S + + +def spec_gen_turn_timeout_s() -> float: + """Wall-clock cap for one LLM one-shot inside generate-spec (run_one_shot).""" + if os.environ.get("LLM_SPEC_GEN_TURN_TIMEOUT_S"): + try: + return max(60.0, float(os.environ["LLM_SPEC_GEN_TURN_TIMEOUT_S"])) + except ValueError: + pass + job_cap = spec_gen_timeout_s() + if os.environ.get("LLM_TEST_TURN_TIMEOUT_S"): + try: + chat_cap = float(os.environ["LLM_TEST_TURN_TIMEOUT_S"]) + except ValueError: + chat_cap = 300.0 + else: + chat_cap = 300.0 + scaled = min(job_cap - 60.0, max(chat_cap, job_cap * 0.6)) + return max(60.0, scaled) + + +def spec_gen_section_wait_s() -> float: + """Poll cap for one phased section — slightly above one-shot turn cap.""" + return min(spec_gen_timeout_s(), spec_gen_turn_timeout_s() + 120.0) + + +def job_wall_timeout_s(job: SpecGenerationJob) -> float: + if job.wall_timeout_s is not None and job.wall_timeout_s > 0: + return float(job.wall_timeout_s) + return spec_gen_timeout_s() + + +def job_turn_timeout_s(job: SpecGenerationJob) -> float: + if job.turn_timeout_s is not None and job.turn_timeout_s > 0: + return float(job.turn_timeout_s) + return spec_gen_turn_timeout_s() + + +@dataclass +class SpecGenerationJob: + job_id: str + workspace: str + todo_id: str + prompt: str = "" + mode: str = "generate" + section: str = "all" + model: str | None = None + status: JobStatus = "pending" + error: str | None = None + requirements: str = "" + design: str = "" + tasks_md: str = "" + raw: str = "" + item: Any = None + ears_blocked: bool = False + ears_issues: list[dict] = field(default_factory=list) + wall_timeout_s: float | None = None + turn_timeout_s: float | None = None + recent_io_events: list[dict] = field(default_factory=list) + messages: list[dict] = field(default_factory=list) + created_at: float = field(default_factory=time.time) + updated_at: float = field(default_factory=time.time) + + +__all__ = [ + "JobStatus", + "SpecGenerationJob", + "_JOB_TTL_S", + "_MAX_JOBS", + "job_turn_timeout_s", + "job_wall_timeout_s", + "spec_gen_section_wait_s", + "spec_gen_timeout_s", + "spec_gen_turn_timeout_s", +] diff --git a/cecli/spec/layers.py b/cecli/spec/layers.py new file mode 100644 index 00000000000..6182efe8c5b --- /dev/null +++ b/cecli/spec/layers.py @@ -0,0 +1,189 @@ +"""Heuristics and normalization for three-layer generated specs.""" + +from __future__ import annotations + +import re + + +def design_references_requirements(requirements: str, design: str) -> bool: + req = (requirements or "").strip() + des = (design or "").strip() + if not des or not re.search(r"REQ-\d+", req, re.I): + return True + if re.search(r"REQ-\d+", des, re.I): + return True + nums = [m.group(1) for m in re.finditer(r"REQ-(\d+)", req, re.I)] + if any(re.search(rf"\b{n}\b", des) for n in nums): + return True + if re.search(r"\brequirement\s*\d+", des, re.I): + return True + return False + + +def requirement_ids(requirements: str) -> list[str]: + return list(dict.fromkeys(re.findall(r"REQ-\d+", requirements, re.I))) + + +_TASK_NUMBERED_RE = re.compile(r"(?:^|\n)\s*(?:-\s*\[[ xX]\]\s*)?\d+\.\s+") + + +def tasks_have_numbered_steps(tasks_md: str) -> bool: + return bool(_TASK_NUMBERED_RE.search(tasks_md or "")) + + +def normalize_tasks_md_numbering(tasks_md: str) -> str: + """Coerce plain bullets into numbered checklist lines (small-model guard).""" + tasks = (tasks_md or "").strip() + if not tasks or tasks_have_numbered_steps(tasks): + return tasks_md or "" + + lines = tasks.splitlines() + out: list[str] = [] + n = 0 + for line in lines: + stripped = line.strip() + if not stripped: + out.append("") + continue + if re.match(r"^\d+\.\s+", stripped): + n = max(n, int(stripped.split(".", 1)[0])) + out.append(f"- [ ] {stripped}") + continue + m = re.match(r"^[-*]\s*(?:\[[ xX]\]\s*)?(\d+)[.)]\s+(.+)$", stripped) + if m: + n = max(n, int(m.group(1))) + out.append(f"- [ ] {m.group(1)}. {m.group(2)}") + continue + m = re.match(r"^[-*]\s*\[[ xX]\]\s*(.+)$", stripped) + if m: + body = m.group(1).strip() + if re.match(r"^\d+\.\s+", body): + out.append(line) + continue + n += 1 + out.append(f"- [ ] {n}. {body}") + continue + m = re.match(r"^[-*]\s+(.+)$", stripped) + if m: + body = m.group(1).strip() + m2 = re.match(r"^(\d+)\.\s+", body) + if m2: + n = max(n, int(m2.group(1))) + out.append(f"- [ ] {body}") + continue + n += 1 + out.append(f"- [ ] {n}. {body}") + continue + m = re.match(r"^(?:task\s*)?(\d+)\s*[:.)]\s*(.+)$", stripped, re.I) + if m: + n = max(n, int(m.group(1))) + out.append(f"- [ ] {m.group(1)}. {m.group(2).strip()}") + continue + out.append(line) + + result = "\n".join(out).strip() + if tasks_have_numbered_steps(result): + return result + return tasks_md + + +def normalize_spec_layer_traceability(layers: dict[str, str]) -> dict[str, str]: + """Ensure design cites REQ ids and tasks use numbered steps (small-model guard).""" + out = dict(layers) + req = (out.get("requirements") or "").strip() + design = (out.get("design") or "").strip() + ids = requirement_ids(req) + if ids and not all(re.search(rf"\b{re.escape(rid)}\b", design, re.I) for rid in ids): + trace = "Covers " + ", ".join(ids) + "." + if not design: + out["design"] = f"## Traceability\n{trace}" + else: + out["design"] = f"{design.rstrip()}\n\n## Traceability\n{trace}" + tasks = normalize_tasks_md_numbering(out.get("tasks_md", "")) + if tasks.strip(): + out["tasks_md"] = tasks + return out + + +_DESIGN_SUBSECTIONS = ( + ("architecture", "Architecture"), + ("component", "Components and Interfaces"), + ("data model", "Data Models"), + ("error", "Error Handling"), + ("testing", "Testing Strategy"), +) + + +def assess_spec_richness( + requirements: str, + design: str, + tasks_md: str, +) -> tuple[bool, list[str]]: + """Non-gating depth check — suggestions to make a spec Kiro-grade. + + Unlike :func:`assess_generated_spec_layers` (a hard usability gate), this only + returns advisory suggestions so a thin-but-valid spec can be deepened. + """ + suggestions: list[str] = [] + req = (requirements or "").strip() + des = (design or "").strip() + tasks = (tasks_md or "").strip() + + if req: + if "user story" not in req.lower(): + suggestions.append("requirements: add a **User Story** line to each requirement") + criteria = len(re.findall(r"(?m)^\s*\d+\.\s+", req)) + ids = len(requirement_ids(req)) + if ids < 2 or criteria < 4: + suggestions.append( + "requirements: add more requirements and acceptance criteria " + "(happy path, edge cases, errors)" + ) + + if des: + low = des.lower() + missing = [label for key, label in _DESIGN_SUBSECTIONS if key not in low] + if missing: + suggestions.append("design: add subsections (" + ", ".join(missing) + ")") + + if tasks: + steps = re.findall(r"(?m)^\s*(?:-\s*\[[ xX]\]\s*)?\d+\.", tasks) + if len(steps) < 3: + suggestions.append("tasks: break the work into more incremental, test-driven steps") + + return len(suggestions) == 0, suggestions + + +def assess_generated_spec_layers( + requirements: str, + design: str, + tasks_md: str, +) -> tuple[bool, list[str]]: + issues: list[str] = [] + req = (requirements or "").strip() + des = (design or "").strip() + tasks = (tasks_md or "").strip() + + if not req: + issues.append("requirements empty") + if not des: + issues.append("design empty") + if not tasks: + issues.append("tasks_md empty") + + if req: + if not re.search(r"REQ-\d+", req, re.I): + issues.append("requirements missing REQ-### id") + if not re.search(r"\bshall\b", req, re.I): + issues.append("requirements missing SHALL") + if not re.search(r"\bwhen\b", req, re.I): + issues.append("requirements missing WHEN") + + if tasks and not re.search(r"(?:^|\n)\s*(?:-\s*\[[ xX]\]\s*)?\d+\.\s+", tasks): + issues.append("tasks_md missing numbered implementation steps") + + if des and req and not design_references_requirements(req, des): + if not (tasks and design_references_requirements(req, tasks)): + issues.append("design does not reference any REQ id") + + return len(issues) == 0, issues diff --git a/cecli/spec/markdown.py b/cecli/spec/markdown.py new file mode 100644 index 00000000000..4327fc7bbc8 --- /dev/null +++ b/cecli/spec/markdown.py @@ -0,0 +1,213 @@ +""" +Import/export workspace tasks as markdown. +""" + +from __future__ import annotations + +import re +import uuid +from typing import Any + +from cecli.spec.todos import ChecklistItem, TodoItem, TodoStore, migrate_todo_layers + +_TASK_HEADER = re.compile(r"^#\s+(.+)$") +_META_ID = re.compile(r"^id:\s*(\S+)\s*$", re.I) +_META_STATUS = re.compile(r"^status:\s*(\S+)\s*$", re.I) +_META_DEPENDS = re.compile(r"^depends_on:\s*(.+)$", re.I) +_META_BRANCH = re.compile(r"^branch:\s*(.+)$", re.I) +_META_PR = re.compile(r"^pr:\s*(.+)$", re.I) +_CHECKLIST_ITEM = re.compile(r"^-\s*\[([ xX])\]\s*(.*)$") + +_LAYER_SECTIONS = { + "requirements": "requirements", + "design": "design", + "implementation tasks": "tasks_md", + "specification": "spec", +} + + +def export_markdown(store: TodoStore) -> str: + blocks: list[str] = [] + for item in store.todos: + item = migrate_todo_layers(item) + lines = [ + f"# {item.title}", + f"id: {item.id}", + f"status: {item.status}", + ] + if item.depends_on: + lines.append(f"depends_on: {', '.join(item.depends_on)}") + if item.branch.strip(): + lines.append(f"branch: {item.branch.strip()}") + if item.pr_url.strip(): + lines.append(f"pr: {item.pr_url.strip()}") + lines.append("") + if item.requirements.strip() or item.design.strip() or item.tasks_md.strip(): + lines += ["## Requirements", item.requirements.strip() or "", ""] + lines += ["## Design", item.design.strip() or "", ""] + lines += ["## Implementation tasks", item.tasks_md.strip() or ""] + else: + lines += ["## Specification", item.spec.strip() or ""] + if item.checklist: + lines += ["", "## Checklist"] + for c in item.checklist: + mark = "x" if c.done else " " + lines.append(f"- [{mark}] {c.text}") + if item.links: + lines += ["", "## Links"] + for link in item.links: + lines.append(f"- {link}") + blocks.append("\n".join(lines)) + active = f"activeId: {store.active_id}\n\n" if store.active_id else "" + body = "\n---\n\n".join(blocks) + return f"# BrightVision Tasks\n\n{active}{body}\n" if body else "# BrightVision Tasks\n\n" + + +def _parse_checklist_line(line: str) -> ChecklistItem | None: + m = _CHECKLIST_ITEM.match(line.strip()) + if not m: + return None + return ChecklistItem( + id=uuid.uuid4().hex[:8], + text=m.group(2).strip(), + done=m.group(1).lower() == "x", + ) + + +def import_markdown( + text: str, existing: TodoStore | None = None, *, merge: bool = False +) -> TodoStore: + store = existing if merge and existing else TodoStore() + if not merge: + store = TodoStore() + + lines = text.replace("\r\n", "\n").split("\n") + i = 0 + active_from_header: str | None = None + if lines and lines[0].strip().lower() == "# brightvision tasks": + i = 1 + while i < len(lines) and not lines[i].strip(): + i += 1 + if i < len(lines) and lines[i].strip().lower().startswith("activeid:"): + active_from_header = lines[i].split(":", 1)[1].strip() or None + i += 1 + + current: dict[str, Any] | None = None + section: str | None = None + section_lines: list[str] = [] + + def flush_task() -> None: + nonlocal current, section_lines, section + if not current or not current.get("title"): + current = None + section_lines = [] + section = None + return + item = TodoItem( + id=str(current.get("id") or uuid.uuid4().hex), + title=str(current["title"]), + spec=str(current.get("spec") or ""), + requirements=str(current.get("requirements") or ""), + design=str(current.get("design") or ""), + tasks_md=str(current.get("tasks_md") or ""), + depends_on=list(current.get("depends_on") or []), + branch=str(current.get("branch") or ""), + pr_url=str(current.get("pr_url") or ""), + status=current.get("status") or "open", + links=list(current.get("links") or []), + checklist=list(current.get("checklist") or []), + ) + store.todos.append(migrate_todo_layers(item)) + current = None + section_lines = [] + section = None + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + if stripped == "---": + flush_task() + i += 1 + continue + + hm = _TASK_HEADER.match(stripped) + if hm and not stripped.lower().startswith("# brightvision"): + flush_task() + current = { + "title": hm.group(1).strip(), + "checklist": [], + "links": [], + "depends_on": [], + "branch": "", + "pr_url": "", + } + section = None + section_lines = [] + i += 1 + continue + + if current is None: + i += 1 + continue + + mid = _META_ID.match(stripped) + if mid: + current["id"] = mid.group(1) + i += 1 + continue + ms = _META_STATUS.match(stripped) + if ms: + st = ms.group(1).lower() + if st in ("open", "in_progress", "done", "cancelled"): + current["status"] = st + i += 1 + continue + md = _META_DEPENDS.match(stripped) + if md: + current["depends_on"] = [p.strip() for p in md.group(1).split(",") if p.strip()] + i += 1 + continue + mb = _META_BRANCH.match(stripped) + if mb: + current["branch"] = mb.group(1).strip() + i += 1 + continue + mp = _META_PR.match(stripped) + if mp: + current["pr_url"] = mp.group(1).strip() + i += 1 + continue + + if stripped.lower().startswith("## "): + if section and section_lines: + key = _LAYER_SECTIONS.get(section, section) + current[key] = "\n".join(section_lines).strip() + section_key = stripped[3:].strip().lower() + section = _LAYER_SECTIONS.get(section_key, section_key) + section_lines = [] + if section in ("checklist", "links"): + pass + i += 1 + continue + + if section == "checklist": + entry = _parse_checklist_line(stripped) + if entry: + current["checklist"].append(entry) + elif section == "links": + if stripped.startswith("- "): + current["links"].append(stripped[2:].strip()) + elif section in ("requirements", "design", "tasks_md", "spec"): + section_lines.append(line) + + i += 1 + + if section and section_lines and current: + current[section] = "\n".join(section_lines).strip() + flush_task() + + if active_from_header and any(t.id == active_from_header for t in store.todos): + store.active_id = active_from_header + + return store diff --git a/cecli/spec/paths.py b/cecli/spec/paths.py new file mode 100644 index 00000000000..8219ad38c8b --- /dev/null +++ b/cecli/spec/paths.py @@ -0,0 +1,57 @@ +"""On-disk paths for BrightVision workspace metadata (under the Cecli project tree).""" + +from __future__ import annotations + +import threading +from pathlib import Path + +# Shared with Cecli agent state (``.cecli/agents/``, ``sessions/``, ``logs/``, …). +WORKSPACE_META_DIR = ".cecli" + +# BrightVision-only subtrees (Cecli does not write these). +TODOS_FILE = "todos.json" +SPECS_DIR = "specs" +ATTACHMENTS_DIR = "attachments" + +_meta_dir_lock_guard = threading.Lock() +_meta_dir_locks: dict[str, threading.Lock] = {} + + +def _meta_dir_lock(root: Path) -> threading.Lock: + key = str(root) + with _meta_dir_lock_guard: + lock = _meta_dir_locks.get(key) + if lock is None: + lock = threading.Lock() + _meta_dir_locks[key] = lock + return lock + + +def workspace_meta_dir(workspace: str | Path) -> Path: + """ + Resolve ``<workspace>/.cecli``. + + Cecli uses ``.cecli/agents/…``, ``sessions/``, ``logs/``, etc. + BrightVision adds ``todos.json``, ``specs/``, ``attachments/`` alongside them. + """ + root = Path(workspace).resolve() + target = root / WORKSPACE_META_DIR + with _meta_dir_lock(root): + target.mkdir(parents=True, exist_ok=True) + return target + + +def todos_json_path(workspace: str | Path) -> Path: + return workspace_meta_dir(workspace) / TODOS_FILE + + +def specs_root(workspace: str | Path) -> Path: + return workspace_meta_dir(workspace) / SPECS_DIR + + +def attachments_dir(workspace: str | Path) -> Path: + return workspace_meta_dir(workspace) / ATTACHMENTS_DIR + + +def attachments_prefix() -> str: + return f"{WORKSPACE_META_DIR}/{ATTACHMENTS_DIR}/" diff --git a/cecli/spec/progress.py b/cecli/spec/progress.py new file mode 100644 index 00000000000..0755695b76e --- /dev/null +++ b/cecli/spec/progress.py @@ -0,0 +1,281 @@ +"""Unified implementation progress: checklist, tasks_md, and agent todo rows.""" + +from __future__ import annotations + +import re +import uuid +from dataclasses import dataclass + +from cecli.spec.agent_todos import AgentTodoRow, rows_from_tasks_md, rows_to_tasks_md +from cecli.spec.implement import checklist_step_prefix, step_sort_key +from cecli.spec.todos import ChecklistItem, TodoItem + +_TASK_MD_CHECKBOX = re.compile(r"^(\s*)-\s*\[([ xX])\]\s*(.+)$") + +_VERIFY_RE = re.compile( + r"^\s*[-*]?\s*verify:\s*`([^`]+)`", + re.IGNORECASE, +) + + +@dataclass(frozen=True) +class ImplementationStep: + step_id: str | None + text: str + done: bool + current: bool + verify_cmd: str | None = None + + +def extract_verify_for_step(tasks_md: str, step_prefix: str) -> str | None: + """Find ``verify: `...` `` under a numbered step block in tasks_md.""" + if not tasks_md or not step_prefix: + return None + + lines = tasks_md.splitlines() + in_step = False + step_indent: int | None = None + + for line in lines: + stripped = line.lstrip() + indent = len(line) - len(stripped) + if not in_step: + m = re.match(r"-\s*\[[ xX]\]\s+(" + re.escape(step_prefix) + r")\s", stripped) + if m: + in_step = True + step_indent = indent + continue + elif stripped and indent <= step_indent: # type: ignore[operator] + in_step = False + elif in_step: + vm = _VERIFY_RE.match(line) + if vm: + return vm.group(1) + return None + + +def merge_agent_progress_into_tasks_md(tasks_md: str, rows: list[AgentTodoRow]) -> str: + """Patch checkbox marks in rich spec tasks_md without replacing REQ/verify prose.""" + if not (tasks_md or "").strip(): + return tasks_md + + done_by_step: dict[str, bool] = {} + done_by_text: dict[str, bool] = {} + for row in rows: + text = (row.text or "").strip() + if not text: + continue + done_by_text[text] = row.done + step = checklist_step_prefix(text) + if step: + done_by_step[step] = row.done + + out: list[str] = [] + for line in tasks_md.splitlines(): + m = _TASK_MD_CHECKBOX.match(line) + if not m: + out.append(line) + continue + indent, body = m.group(1), m.group(3).strip() + step = checklist_step_prefix(body) + new_done = None + if step and step in done_by_step: + new_done = done_by_step[step] + elif body in done_by_text: + new_done = done_by_text[body] + if new_done is None: + out.append(line) + continue + mark = "x" if new_done else " " + out.append(f"{indent}- [{mark}] {body}") + + merged = "\n".join(out) + if tasks_md.endswith("\n"): + merged += "\n" + return merged + + +def checklist_from_agent_rows( + rows: list[AgentTodoRow], + prior: list[ChecklistItem] | None = None, +) -> list[ChecklistItem]: + """Build checklist from agent rows, reusing stable ids when step/text matches.""" + prior = prior or [] + by_step: dict[str, ChecklistItem] = {} + by_text: dict[str, ChecklistItem] = {} + for entry in prior: + text = entry.text.strip() + by_text[text] = entry + step = checklist_step_prefix(text) + if step: + by_step[step] = entry + + out: list[ChecklistItem] = [] + for row in rows: + text = row.text.strip() + step = checklist_step_prefix(text) + existing = by_step.get(step) if step else None + if existing is None and text in by_text: + existing = by_text[text] + cid = existing.id if existing else uuid.uuid4().hex[:8] + out.append(ChecklistItem(id=cid, text=row.text, done=row.done)) + return out + + +def materialize_checklist_from_tasks_md(item: TodoItem) -> list[ChecklistItem]: + """Populate checklist from tasks_md checkbox lines when runtime checklist is missing.""" + parsed = rows_from_tasks_md(item.tasks_md or "") + if not parsed: + return list(item.checklist or []) + rows = [AgentTodoRow(text=row.text, done=row.done, current=row.current) for row in parsed] + return checklist_from_agent_rows(rows, prior=item.checklist or []) + + +def _rows_from_item(item: TodoItem) -> list[AgentTodoRow]: + if item.checklist: + marked_current = False + rows: list[AgentTodoRow] = [] + for entry in item.checklist: + current = not entry.done and not marked_current + if current: + marked_current = True + rows.append(AgentTodoRow(text=entry.text, done=entry.done, current=current)) + return rows + return rows_from_tasks_md(item.tasks_md or "") + + +def implementation_steps(item: TodoItem) -> list[ImplementationStep]: + """Ordered implement steps from checklist (preferred) or tasks_md.""" + tasks_md = item.tasks_md or "" + steps: list[ImplementationStep] = [] + for row in _rows_from_item(item): + step_id = checklist_step_prefix(row.text) + verify = extract_verify_for_step(tasks_md, step_id) if step_id else None + steps.append( + ImplementationStep( + step_id=step_id, + text=row.text.strip(), + done=row.done, + current=row.current, + verify_cmd=verify, + ) + ) + return steps + + +def parse_open_step_ids(item: TodoItem) -> list[str]: + """Open numbered step ids in document order.""" + ids: list[str] = [] + for step in implementation_steps(item): + if step.done or not step.step_id: + continue + ids.append(step.step_id) + return ids + + +def next_open_implementation_step( + item: TodoItem, + after: str | None, +) -> ImplementationStep | None: + """Next open step after ``after`` (or first open when ``after`` is None).""" + open_steps = [s for s in implementation_steps(item) if s.step_id and not s.done] + if not open_steps: + return None + if not after: + return open_steps[0] + + completed_key = step_sort_key(after) + for step in open_steps: + if step_sort_key(step.step_id or "") > completed_key: + return step + return open_steps[0] + + +def extract_step_text_from_tasks_md(tasks_md: str, step: str) -> str: + """First-line + indented body for a numbered checkbox step in tasks_md.""" + lines = tasks_md.splitlines() + collecting = False + step_indent: int | None = None + collected: list[str] = [] + + for line in lines: + stripped = line.lstrip() + indent = len(line) - len(stripped) + if not collecting: + m = re.match(r"-\s*\[[ xX]\]\s+" + re.escape(step) + r"\s+(.*)", stripped) + if m: + collecting = True + step_indent = indent + collected.append(m.group(1)) + continue + elif stripped and indent <= step_indent: # type: ignore[operator] + break + else: + collected.append(stripped) + return "\n".join(collected).strip() + + +def mark_implementation_step_done( + item: TodoItem, + step_id: str, + *, + done: bool = True, +) -> TodoItem: + """Mark one numbered step done in both checklist and tasks_md.""" + step_id = (step_id or "").strip() + if not step_id: + return item + + if not item.checklist: + item.checklist = materialize_checklist_from_tasks_md(item) + + rows: list[AgentTodoRow] = [] + new_checklist: list[ChecklistItem] = [] + for entry in item.checklist: + prefix = checklist_step_prefix(entry.text) + is_target = prefix == step_id + new_done = done if is_target else entry.done + new_checklist.append(ChecklistItem(id=entry.id, text=entry.text, done=new_done)) + rows.append(AgentTodoRow(text=entry.text, done=new_done, current=False)) + + item.checklist = new_checklist + if (item.tasks_md or "").strip(): + item.tasks_md = merge_agent_progress_into_tasks_md(item.tasks_md, rows) + elif rows: + item.tasks_md = rows_to_tasks_md(rows) + return item + + +def try_mark_focus_step_complete( + item: TodoItem, + focus_step: str | None, + *, + flutter_test_ok: bool | None, + verify_ok: bool | None, +) -> tuple[TodoItem, bool]: + """Mark the focused step done when automation gates pass (#53).""" + from cecli.spec.implement import is_test_related_checklist_text + + focus = (focus_step or "").strip() + if not focus: + return item, False + if verify_ok is False: + return item, False + + steps = implementation_steps(item) + focus_step_row = next((s for s in steps if s.step_id == focus), None) + if focus_step_row is None: + return item, False + if focus_step_row.done: + return item, False + + passed_gate = False + if is_test_related_checklist_text(focus_step_row.text): + passed_gate = flutter_test_ok is True + elif verify_ok is True: + passed_gate = True + + if not passed_gate: + return item, False + + return mark_implementation_step_done(item, focus, done=True), True diff --git a/cecli/spec/pubspec_repair.py b/cecli/spec/pubspec_repair.py new file mode 100644 index 00000000000..0409e25a3b2 --- /dev/null +++ b/cecli/spec/pubspec_repair.py @@ -0,0 +1,193 @@ +"""Detect and repair missing Dart package dependencies in pubspec.yaml.""" + +from __future__ import annotations + +import os +import re +import subprocess +from dataclasses import dataclass +from pathlib import Path + +_DART_PKG_IMPORT = re.compile( + r"""^\s*import\s+['"]package:([a-zA-Z_][\w]*)/""", + re.MULTILINE, +) + +_BUILTIN_PACKAGES = frozenset( + { + "flutter", + "flutter_test", + "integration_test", + "flutter_localizations", + "flutter_web_plugins", + } +) + +_DEP_SECTION = re.compile(r"^(\s*)(dependencies|dev_dependencies):\s*$", re.MULTILINE) + + +@dataclass(frozen=True) +class PubspecRepairResult: + missing: tuple[str, ...] + added: tuple[str, ...] + applied: bool + message: str + + +def collect_dart_package_imports(workspace: str | Path) -> set[str]: + """Package names imported from ``lib/`` and ``test/`` Dart sources.""" + root = Path(workspace).resolve() + packages: set[str] = set() + for sub in ("lib", "test"): + base = root / sub + if not base.is_dir(): + continue + for path in base.rglob("*.dart"): + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + for match in _DART_PKG_IMPORT.finditer(text): + name = match.group(1) + if name not in _BUILTIN_PACKAGES: + packages.add(name) + return packages + + +def parse_pubspec_dependencies(pubspec_text: str) -> set[str]: + """Declared package names under dependencies / dev_dependencies.""" + declared: set[str] = set() + section: str | None = None + for line in pubspec_text.splitlines(): + stripped = line.strip() + if stripped in ("dependencies:", "dev_dependencies:"): + section = stripped[:-1] + continue + if section and stripped and not stripped.startswith("#"): + if re.match(r"^[a-zA-Z_][\w]*:", stripped) and not stripped.startswith("sdk:"): + key = stripped.split(":", 1)[0].strip() + if key not in ("flutter", "sdk"): + declared.add(key) + elif line and not line[0].isspace(): + section = None + return declared + + +def find_missing_pubspec_dependencies(workspace: str | Path) -> list[str]: + root = Path(workspace).resolve() + pubspec = root / "pubspec.yaml" + if not pubspec.is_file(): + return [] + try: + text = pubspec.read_text(encoding="utf-8") + except OSError: + return [] + used = collect_dart_package_imports(root) + declared = parse_pubspec_dependencies(text) + return sorted(used - declared) + + +def _append_dependencies(pubspec_text: str, packages: list[str]) -> str: + if not packages: + return pubspec_text + lines = pubspec_text.splitlines() + insert_at: int | None = None + for idx, line in enumerate(lines): + if line.strip() == "dependencies:": + insert_at = idx + 1 + break + if insert_at is None: + if lines and lines[-1].strip(): + lines.append("") + lines.append("dependencies:") + insert_at = len(lines) + indent = " " + for pkg in packages: + lines.insert(insert_at, f"{indent}{pkg}: any") + insert_at += 1 + return "\n".join(lines) + ("\n" if pubspec_text.endswith("\n") else "") + + +def _run_flutter_pub_add(workspace: Path, packages: list[str]) -> tuple[bool, str]: + from cecli.spec.implement import resolve_flutter_executable + + flutter = resolve_flutter_executable() + if not flutter: + return False, "flutter not found on PATH" + cmd = [flutter, "pub", "add", *packages] + try: + proc = subprocess.run( + cmd, + cwd=str(workspace), + capture_output=True, + text=True, + timeout=120, + env={**os.environ, "PATH": os.environ.get("PATH", "")}, + check=False, + ) + except (subprocess.TimeoutExpired, OSError) as exc: + return False, str(exc) + out = ((proc.stdout or "") + (proc.stderr or "")).strip() + return proc.returncode == 0, out[-2000:] if out else f"exit {proc.returncode}" + + +def repair_pubspec_dependencies( + workspace: str | Path, + packages: list[str] | None = None, + *, + apply: bool = False, +) -> PubspecRepairResult: + """Detect or add missing pub dependencies (flutter pub add when possible).""" + root = Path(workspace).resolve() + pubspec = root / "pubspec.yaml" + if not pubspec.is_file(): + return PubspecRepairResult((), (), False, "pubspec.yaml missing") + + missing = list(packages or find_missing_pubspec_dependencies(root)) + if not missing: + return PubspecRepairResult((), (), False, "No missing package dependencies detected.") + + if not apply: + return PubspecRepairResult( + tuple(missing), + (), + False, + f"Missing dependencies: {', '.join(missing)}. Re-run with --apply.", + ) + + ok, output = _run_flutter_pub_add(root, missing) + if ok: + return PubspecRepairResult( + tuple(missing), + tuple(missing), + True, + output or f"Added: {', '.join(missing)}", + ) + + try: + original = pubspec.read_text(encoding="utf-8") + updated = _append_dependencies(original, missing) + pubspec.write_text(updated, encoding="utf-8") + except OSError as exc: + return PubspecRepairResult(tuple(missing), (), False, f"Failed to edit pubspec.yaml: {exc}") + + return PubspecRepairResult( + tuple(missing), + tuple(missing), + True, + f"flutter pub add failed ({output}); appended {', '.join(missing)} under dependencies:", + ) + + +def pubspec_repair_snapshot_lines(workspace: str | Path) -> list[str]: + """Optional implement-snapshot lines when imports lack pubspec entries.""" + missing = find_missing_pubspec_dependencies(workspace) + if not missing: + return [] + preview = ", ".join(f"`{p}`" for p in missing[:6]) + extra = f" (+{len(missing) - 6} more)" if len(missing) > 6 else "" + return [ + f"- **pubspec.yaml** — missing dependencies: {preview}{extra}. " + "Add with **EditText** on `pubspec.yaml` or run " + "`bright-vision-tasks repair-pubspec --apply`." + ] diff --git a/cecli/spec/runtime.py b/cecli/spec/runtime.py new file mode 100644 index 00000000000..b9bc3165276 --- /dev/null +++ b/cecli/spec/runtime.py @@ -0,0 +1,35 @@ +"""Protocols for Vision HTTP session glue (implemented in bright_vision_core).""" + +from __future__ import annotations + +from typing import Any, Iterator, Protocol + + +class SpecTurnRunner(Protocol): + """Headless chat session used for repo-grounded spec generation.""" + + def apply_spec_gen_route(self, routing_text: str) -> None: ... + + def run_message(self, message: str, **kwargs: Any) -> Iterator[dict[str, Any]]: ... + + def run_one_shot( + self, + message: str, + *, + timeout_s: float, + **kwargs: Any, + ) -> str: ... + + def interrupt_turn(self) -> None: ... + + +class AgentCoderBridge(Protocol): + @property + def root(self) -> Any: ... + + def local_agent_folder(self, name: str) -> str: ... + + +class AgentTodoSession(Protocol): + @property + def coder(self) -> AgentCoderBridge: ... diff --git a/cecli/spec/steering.py b/cecli/spec/steering.py new file mode 100644 index 00000000000..0e79886feac --- /dev/null +++ b/cecli/spec/steering.py @@ -0,0 +1,163 @@ +# flake8: noqa: E501 +"""Project steering markdown for spec-focus sessions (Kiro-style).""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +STEERING_MAIN_RELPATH = ".cecli/STEERING.md" +STEERING_FRAGMENTS_DIR_RELPATH = ".cecli/steering" + +DEFAULT_STEERING_TEMPLATE = """\ +# Project steering + +Rules the spec agent and implementation turns should follow across **all** tasks in this repo. + +## Stack & conventions + +- Language / framework: +- Test command: +- Avoid: + +## Spec discipline + +- EARS: ### REQ-NNN with **WHEN** … **THE** system **SHALL** … +- Keep design and tasks_md aligned with every REQ id. +- Do not mark implementation done until requirements pass EARS lint. +""" + +SPEC_FOCUS_INSTRUCTIONS = """\ +## Spec-focus mode (BrightVision) + +You are in **spec-focus**: work on the active task's requirements, design, and implementation tasks only. + +- Prefer editing `.cecli/specs/<task-id>/` layers and related docs; avoid drive-by refactors. +- Use EARS notation: ### REQ-### headings, **WHEN** … **THE** system **SHALL** … +- Keep design and tasks_md aligned with every REQ id; call out gaps explicitly. +- Do not mark implementation done until requirements pass EARS lint (WHEN/SHALL, no duplicate REQ ids). +""" + +SCAFFOLD_HINT = """\ +## Scaffolding (this turn) + +Use **ContextManager create** for **missing** paths **named in the checklist** (add on missing files is upgraded to create). +Then **ReadRange** + **EditText** on **one** target file. **No ls.** +Do not edit or ReadRange paths **not named** in the current checklist item. +""" + + +IMPLEMENTATION_TOOL_HINTS = """\ +## Implementation turn (tools) + +- **Empty files:** `ReadRange` once with `@000`/`000@`, then **`EditText`** (replace `@000`–`@000`) or **`ContextManager`** create — do not re-read the same empty file. +- **Before EditText:** always **`ReadRange`** the target file in the same turn (required for new files and after ContextManager create). +- **Scaffolding:** prefer `ContextManager` + `EditText` over repeated `ls` / `Grep` on known paths. +- After a successful read, edit — do not loop on exploration. +- **UpdateTodoList:** mark **only the current** checklist item `done: true` after **EditText** succeeded (and BrightVision **flutter test** passed when applicable) — never on failed edits or skipped verification. +- **Do not** run `flutter test` via Command — BrightVision runs it at end of implement turns. +- When EditText errors, read the error, **ReadRange**, retry one file; do not assume success from assistant prose alone. +""" + + +@dataclass(frozen=True) +class SteeringFileRecord: + relpath: str + size_bytes: int + nonempty: bool + + +@dataclass(frozen=True) +class SteeringFilesSnapshot: + main: SteeringFileRecord | None + fragments: tuple[SteeringFileRecord, ...] + + @property + def has_content(self) -> bool: + if self.main and self.main.nonempty: + return True + return any(fragment.nonempty for fragment in self.fragments) + + @property + def file_count(self) -> int: + count = 0 + if self.main and self.main.nonempty: + count += 1 + count += sum(1 for fragment in self.fragments if fragment.nonempty) + return count + + +def _steering_file_record(root: Path, relpath: str) -> SteeringFileRecord | None: + path = root / relpath + if not path.is_file(): + return None + try: + size_bytes = path.stat().st_size + text = path.read_text(encoding="utf-8").strip() + except OSError: + return None + return SteeringFileRecord( + relpath=relpath.replace("\\", "/"), + size_bytes=size_bytes, + nonempty=bool(text), + ) + + +def scan_steering_files(workspace: str | Path) -> SteeringFilesSnapshot: + """List ``.cecli/STEERING.md`` and ``.cecli/steering/*.md`` with sizes.""" + root = Path(workspace).resolve() + main = _steering_file_record(root, STEERING_MAIN_RELPATH) + fragments: list[SteeringFileRecord] = [] + frag_dir = root / ".cecli" / "steering" + if frag_dir.is_dir(): + for path in sorted(frag_dir.glob("*.md")): + rel = str(path.relative_to(root)).replace("\\", "/") + record = _steering_file_record(root, rel) + if record is not None: + fragments.append(record) + return SteeringFilesSnapshot(main=main, fragments=tuple(fragments)) + + +def scaffold_steering_files(workspace: str | Path) -> list[str]: + """Create ``.cecli/STEERING.md`` from template when missing. Returns new relpaths.""" + root = Path(workspace).resolve() + created: list[str] = [] + main_path = root / ".cecli" / "STEERING.md" + if not main_path.is_file(): + main_path.parent.mkdir(parents=True, exist_ok=True) + main_path.write_text(DEFAULT_STEERING_TEMPLATE, encoding="utf-8") + created.append(STEERING_MAIN_RELPATH) + return created + + +def load_steering_markdown(workspace: str | Path) -> str: + """Load ``.cecli/STEERING.md`` and ``.cecli/steering/*.md`` if present.""" + root = Path(workspace).resolve() + parts: list[str] = [] + single = root / ".cecli" / "STEERING.md" + if single.is_file(): + try: + text = single.read_text(encoding="utf-8").strip() + if text: + parts.append(text) + except OSError: + pass + steering_dir = root / ".cecli" / "steering" + if steering_dir.is_dir(): + for path in sorted(steering_dir.glob("*.md")): + try: + text = path.read_text(encoding="utf-8").strip() + if text: + parts.append(f"### {path.name}\n{text}") + except OSError: + continue + return "\n\n".join(parts).strip() + + +def build_spec_focus_preamble(workspace: str | Path) -> str: + """Steering files + spec-focus instructions for chat prepend.""" + steering = load_steering_markdown(workspace) + blocks = [SPEC_FOCUS_INSTRUCTIONS.strip()] + if steering: + blocks.append("## Project steering\n" + steering) + return "\n\n".join(blocks) + "\n\n" diff --git a/cecli/spec/tasks_cli.py b/cecli/spec/tasks_cli.py new file mode 100644 index 00000000000..3b451487342 --- /dev/null +++ b/cecli/spec/tasks_cli.py @@ -0,0 +1,195 @@ +"""Workspace task / spec progress CLI (headless, no interactive cecli shell).""" + +from __future__ import annotations + +import argparse +import json +import sys +from dataclasses import asdict +from pathlib import Path + +from cecli.spec.agent_todos import import_agent_plan_for_workspace +from cecli.spec.progress import ( + implementation_steps, + materialize_checklist_from_tasks_md, + next_open_implementation_step, +) +from cecli.spec.pubspec_repair import repair_pubspec_dependencies +from cecli.spec.steering import scaffold_steering_files, scan_steering_files +from cecli.spec.todos import WorkspaceTodos + + +def _resolve_item(api: WorkspaceTodos, todo_id: str | None): + store = api.load() + if todo_id: + return api.find(store, todo_id) + if store.active_id: + return api.find(store, store.active_id) + return store.todos[0] if store.todos else None + + +def cmd_materialize(workspace: Path, todo_id: str | None) -> int: + api = WorkspaceTodos(workspace) + item = _resolve_item(api, todo_id) + if item is None: + print("No task found.", file=sys.stderr) + return 1 + checklist = materialize_checklist_from_tasks_md(item) + if not checklist: + print("Nothing to materialize (no numbered steps in tasks_md).", file=sys.stderr) + return 1 + updated, _ = api.update(item.id, checklist=checklist) + print( + json.dumps( + {"todo_id": updated.id, "checklist": [asdict(c) for c in updated.checklist]}, indent=2 + ) + ) + return 0 + + +def cmd_progress(workspace: Path, todo_id: str | None) -> int: + api = WorkspaceTodos(workspace) + item = _resolve_item(api, todo_id) + if item is None: + print("No task found.", file=sys.stderr) + return 1 + steps = implementation_steps(item) + nxt = next_open_implementation_step(item, None) + payload = { + "todo_id": item.id, + "title": item.title, + "steps": [ + { + "step_id": s.step_id, + "text": s.text, + "done": s.done, + "current": s.current, + "verify_cmd": s.verify_cmd, + } + for s in steps + ], + "next_open": ( + { + "step_id": nxt.step_id, + "text": nxt.text, + "verify_cmd": nxt.verify_cmd, + } + if nxt + else None + ), + } + print(json.dumps(payload, indent=2)) + return 0 + + +def cmd_sync_agent(workspace: Path) -> int: + store = import_agent_plan_for_workspace(workspace) + print(json.dumps({"active_id": store.active_id, "todos": len(store.todos)}, indent=2)) + return 0 + + +def _steering_payload(snapshot) -> dict: + return { + "has_content": snapshot.has_content, + "file_count": snapshot.file_count, + "main": asdict(snapshot.main) if snapshot.main else None, + "fragments": [asdict(fragment) for fragment in snapshot.fragments], + } + + +def cmd_steering_scan(workspace: Path) -> int: + snapshot = scan_steering_files(workspace) + print(json.dumps(_steering_payload(snapshot), indent=2)) + return 0 + + +def cmd_steering_scaffold(workspace: Path) -> int: + created = scaffold_steering_files(workspace) + snapshot = scan_steering_files(workspace) + print( + json.dumps( + {"created": created, **_steering_payload(snapshot)}, + indent=2, + ) + ) + return 0 + + +def cmd_repair_pubspec(workspace: Path, *, apply: bool) -> int: + result = repair_pubspec_dependencies(workspace, apply=apply) + print( + json.dumps( + { + "missing": list(result.missing), + "added": list(result.added), + "applied": result.applied, + "message": result.message, + }, + indent=2, + ) + ) + return 0 if result.applied or not result.missing else 0 + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="bright-vision-tasks", + description="Spec task progress utilities (materialize, sync, pubspec repair).", + ) + parser.add_argument( + "--workspace", + type=Path, + default=Path.cwd(), + help="Git workspace root (default: cwd)", + ) + sub = parser.add_subparsers(dest="command", required=True) + + p_mat = sub.add_parser("materialize", help="Build checklist rows from tasks_md") + p_mat.add_argument("--todo-id", default=None, help="Task id (default: active)") + + p_prog = sub.add_parser("progress", help="Print unified implementation progress JSON") + p_prog.add_argument("--todo-id", default=None) + + sub.add_parser("sync-agent", help="Import agent todo.txt into workspace Tasks") + + p_pub = sub.add_parser("repair-pubspec", help="Detect or add missing Dart pub dependencies") + p_pub.add_argument( + "--apply", action="store_true", help="Run flutter pub add or edit pubspec.yaml" + ) + + p_steer = sub.add_parser("steering", help="Project steering files (.cecli/STEERING.md)") + steer_sub = p_steer.add_subparsers(dest="steering_cmd", required=True) + steer_sub.add_parser("scan", help="List steering markdown files as JSON") + steer_sub.add_parser("scaffold", help="Create STEERING.md template when missing") + + return parser + + +def main(argv: list[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(argv) + workspace = args.workspace.resolve() + if not workspace.is_dir(): + print(f"Not a directory: {workspace}", file=sys.stderr) + return 1 + + if args.command == "materialize": + return cmd_materialize(workspace, args.todo_id) + if args.command == "progress": + return cmd_progress(workspace, args.todo_id) + if args.command == "sync-agent": + return cmd_sync_agent(workspace) + if args.command == "repair-pubspec": + return cmd_repair_pubspec(workspace, apply=args.apply) + if args.command == "steering": + if args.steering_cmd == "scan": + return cmd_steering_scan(workspace) + if args.steering_cmd == "scaffold": + return cmd_steering_scaffold(workspace) + parser.error(f"unknown steering command: {args.steering_cmd}") + parser.error(f"unknown command: {args.command}") + return 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cecli/spec/todos.py b/cecli/spec/todos.py new file mode 100644 index 00000000000..691acbab26f --- /dev/null +++ b/cecli/spec/todos.py @@ -0,0 +1,676 @@ +""" +Workspace task list persisted in ``.cecli/todos.json`` (see ``workspace_paths``). +""" + +from __future__ import annotations + +import json +import os +import shutil +import uuid +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Literal + +from cecli.spec.paths import specs_root, todos_json_path, workspace_meta_dir + +TodoStatus = Literal["open", "in_progress", "done", "cancelled"] + +TODO_TEMPLATES: dict[str, str] = { + "feature": "## Goal\n\n" "## Requirements\n\n" "## Acceptance criteria\n" "- [ ] \n", + "bugfix": ( + "## Problem\n\n" + "## Root cause\n\n" + "## Fix verification\n" + "- [ ] Repro fixed\n" + "- [ ] Tests pass\n" + ), + "refactor": ( + "## Scope\n\n" + "## Non-goals\n\n" + "## Acceptance criteria\n" + "- [ ] Behavior unchanged\n" + "- [ ] \n" + ), +} + +# Kiro-style three-layer spec (v4) +SPEC_LAYER_TEMPLATES: dict[str, dict[str, str]] = { + "spec-driven": { + "requirements": ( + "### REQ-001\n" + "**WHEN** the user …\n" + "**THE** system **SHALL** …\n\n" + "### REQ-002\n" + "**WHEN** …\n" + "**THE** system **SHALL** …\n" + ), + "design": "## Overview\n\n" "## Architecture\n\n" "## Components\n\n" "## Data flow\n\n", + "tasks_md": ( + "## Implementation tasks\n\n" "- [ ] 1. … (depends: none)\n" "- [ ] 2. … (depends: 1)\n" + ), + }, +} + + +def _now_iso() -> str: + return datetime.now(timezone.utc).replace(microsecond=0).isoformat() + + +def apply_template(name: str) -> str: + return TODO_TEMPLATES.get((name or "").strip().lower(), "") + + +def apply_layer_template(name: str) -> dict[str, str]: + return dict(SPEC_LAYER_TEMPLATES.get((name or "").strip().lower(), {})) + + +def migrate_todo_layers(item: TodoItem) -> TodoItem: + """Move legacy single ``spec`` into ``requirements`` when layers are empty.""" + if item.spec.strip() and not ( + item.requirements.strip() or item.design.strip() or item.tasks_md.strip() + ): + item.requirements = item.spec.strip() + return item + + +@dataclass +class ChecklistItem: + id: str + text: str + done: bool = False + + def to_dict(self) -> dict[str, Any]: + return asdict(self) + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> ChecklistItem: + return cls( + id=str(raw.get("id") or uuid.uuid4().hex[:8]), + text=str(raw.get("text") or ""), + done=bool(raw.get("done")), + ) + + +@dataclass +class TodoItem: + id: str + title: str + spec: str = "" + requirements: str = "" + design: str = "" + tasks_md: str = "" + depends_on: list[str] = field(default_factory=list) + branch: str = "" + pr_url: str = "" + status: TodoStatus = "open" + links: list[str] = field(default_factory=list) + checklist: list[ChecklistItem] = field(default_factory=list) + created_at: str = field(default_factory=_now_iso) + updated_at: str = field(default_factory=_now_iso) + + def to_dict(self) -> dict[str, Any]: + d = asdict(self) + d["checklist"] = [c.to_dict() for c in self.checklist] + return d + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> TodoItem: + checklist = [ChecklistItem.from_dict(c) for c in raw.get("checklist") or []] + status = raw.get("status") + valid = status if status in ("open", "in_progress", "done", "cancelled") else "open" + deps = raw.get("depends_on") or raw.get("dependsOn") or [] + item = cls( + id=str(raw.get("id") or uuid.uuid4().hex), + title=str(raw.get("title") or "Untitled"), + spec=str(raw.get("spec") or ""), + requirements=str(raw.get("requirements") or ""), + design=str(raw.get("design") or ""), + tasks_md=str(raw.get("tasks_md") or raw.get("tasksMd") or ""), + depends_on=[str(d) for d in deps if str(d).strip()], + branch=str(raw.get("branch") or ""), + pr_url=str(raw.get("pr_url") or raw.get("prUrl") or ""), + status=valid, + links=list(raw.get("links") or []), + checklist=checklist, + created_at=str(raw.get("created_at") or _now_iso()), + updated_at=str(raw.get("updated_at") or _now_iso()), + ) + return migrate_todo_layers(item) + + +@dataclass +class TodoStore: + version: int = 1 + active_id: str | None = None + todos: list[TodoItem] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + return { + "version": self.version, + "activeId": self.active_id, + "todos": [t.to_dict() for t in self.todos], + } + + @classmethod + def from_dict(cls, raw: dict[str, Any]) -> TodoStore: + items = [TodoItem.from_dict(t) for t in raw.get("todos") or []] + active = raw.get("activeId") or raw.get("active_id") + if active and not any(t.id == active for t in items): + active = None + return cls(version=int(raw.get("version") or 1), active_id=active, todos=items) + + +def _append_checklist_block(lines: list[str], checklist: list[ChecklistItem]) -> None: + """GFM checklist in a markdown fence so the chat UI renders task list formatting.""" + if not checklist: + return + lines.extend(["", "## Checklist", "```markdown"]) + for entry in checklist: + mark = "x" if entry.done else " " + lines.append(f"- [{mark}] {entry.text}") + lines.append("```") + + +def checklist_all_done(item: TodoItem) -> bool: + if not item.checklist: + return False + return all(c.text.strip() and c.done for c in item.checklist) + + +def _layer_or_placeholder(text: str, placeholder: str) -> str: + return text.strip() or placeholder + + +def format_todo_context(item: TodoItem, *, store: TodoStore | None = None) -> str: + item = migrate_todo_layers(item) + lines = [f"[Active task: {item.title} · id {item.id[:8]}]", ""] + if item.branch.strip(): + lines.append(f"**Git branch:** {item.branch.strip()}") + if item.pr_url.strip(): + lines.append(f"**Pull request:** {item.pr_url.strip()}") + if item.branch.strip() or item.pr_url.strip(): + lines.append("") + if item.depends_on and store: + pending = [] + for dep_id in item.depends_on: + dep = next( + (t for t in store.todos if t.id == dep_id or t.id.startswith(dep_id)), + None, + ) + if dep and dep.status != "done": + pending.append(f"{dep.title} ({dep.id[:8]})") + if pending: + lines += ["**Blocked by:** " + ", ".join(pending), ""] + lines += [ + "## Requirements", + _layer_or_placeholder(item.requirements, "(No requirements yet.)"), + "", + "## Design", + _layer_or_placeholder(item.design, "(No design yet.)"), + "", + "## Implementation tasks", + _layer_or_placeholder(item.tasks_md, "(No implementation tasks yet.)"), + ] + if item.spec.strip() and item.spec.strip() != item.requirements.strip(): + lines += ["", "## Legacy specification", item.spec.strip()] + if item.checklist: + _append_checklist_block(lines, item.checklist) + lines += ["", "---", ""] + return "\n".join(lines) + + +def format_todo_context_light(item: TodoItem, *, store: TodoStore | None = None) -> str: + """Checklist-first task inject — no empty Requirements/Design layers.""" + item = migrate_todo_layers(item) + lines = [f"[Active task: {item.title} · id {item.id[:8]}]", ""] + if item.branch.strip(): + lines.append(f"**Git branch:** {item.branch.strip()}") + if item.pr_url.strip(): + lines.append(f"**Pull request:** {item.pr_url.strip()}") + if item.branch.strip() or item.pr_url.strip(): + lines.append("") + if item.depends_on and store: + pending = [] + for dep_id in item.depends_on: + dep = next( + (t for t in store.todos if t.id == dep_id or t.id.startswith(dep_id)), + None, + ) + if dep and dep.status != "done": + pending.append(f"{dep.title} ({dep.id[:8]})") + if pending: + lines += ["**Blocked by:** " + ", ".join(pending), ""] + if item.checklist: + _append_checklist_block(lines, item.checklist) + elif item.tasks_md.strip() and item.tasks_md.strip() not in _SPEC_LAYER_PLACEHOLDERS: + lines += ["## Tasks", item.tasks_md.strip()] + lines += ["", "---", ""] + return "\n".join(lines) + + +_IMPLEMENT_DESIGN_MAX_CHARS = int(os.environ.get("BV_IMPLEMENT_DESIGN_MAX_CHARS", "4000")) +_IMPLEMENT_TASKS_MAX_OPEN = int(os.environ.get("BV_IMPLEMENT_TASKS_MAX_OPEN", "12")) + + +def _implementation_tasks_for_inject(item: TodoItem, *, max_open: int | None = None) -> str: + """Open checklist steps only — full tasks_md can be 20k+ tokens on local models.""" + from cecli.spec.progress import implementation_steps + + limit = max_open if max_open is not None else _IMPLEMENT_TASKS_MAX_OPEN + steps = implementation_steps(item) + open_steps = [s for s in steps if not s.done and s.step_id] + if not open_steps: + return _layer_or_placeholder(item.tasks_md, "(No implementation tasks yet.)") + shown = open_steps[: max(1, limit)] + lines: list[str] = [] + for step in shown: + cur = " **(current)**" if step.current else "" + lines.append(f"- [ ] {step.step_id} {step.text.strip()}{cur}") + hidden = len(open_steps) - len(shown) + if hidden: + lines.append( + f"\n… {hidden} more open implementation step(s) — full list in Tasks / `.cecli/specs/`." + ) + return "\n".join(lines) + + +def _truncate_spec_layer(text: str, *, max_chars: int, label: str) -> str: + trimmed = text.strip() + if not trimmed or trimmed in _SPEC_LAYER_PLACEHOLDERS: + return _layer_or_placeholder(trimmed, f"(No {label} yet.)") + if len(trimmed) <= max_chars: + return trimmed + cut = trimmed[:max_chars] + # If we cut inside a fenced code block, close it so downstream markdown/mermaid + # parsers don't receive an unterminated fence. + open_fences = 0 + for line in cut.splitlines(): + stripped = line.strip() + if stripped.startswith("```"): + # Toggle: opening fence or closing fence + if open_fences > 0 and stripped == "```": + open_fences -= 1 + else: + open_fences += 1 + suffix = f"\n… ({label} truncated — full text in chat history or `.cecli/specs/`)" + if open_fences > 0: + # Close the open fence before the truncation notice + suffix = "\n```" + suffix + return cut + suffix + + +def _requirements_summary_for_implement(requirements: str) -> str: + """REQ headings only — keeps implement turns lean on local models.""" + headings = [ + line.strip() for line in requirements.splitlines() if line.strip().startswith("### REQ-") + ] + if headings: + return "\n".join(headings) + return _truncate_spec_layer(requirements, max_chars=1500, label="requirements") + + +def format_todo_context_implement(item: TodoItem, *, store: TodoStore | None = None) -> str: + """Lean inject for Start work / Implement step — tasks + truncated design, REQ headings only.""" + item = migrate_todo_layers(item) + lines = [f"[Active task: {item.title} · id {item.id[:8]}]", ""] + if item.branch.strip(): + lines.append(f"**Git branch:** {item.branch.strip()}") + if item.pr_url.strip(): + lines.append(f"**Pull request:** {item.pr_url.strip()}") + if item.branch.strip() or item.pr_url.strip(): + lines.append("") + if item.depends_on and store: + pending = [] + for dep_id in item.depends_on: + dep = next( + (t for t in store.todos if t.id == dep_id or t.id.startswith(dep_id)), + None, + ) + if dep and dep.status != "done": + pending.append(f"{dep.title} ({dep.id[:8]})") + if pending: + lines += ["**Blocked by:** " + ", ".join(pending), ""] + lines += [ + "## Requirements (summary)", + _requirements_summary_for_implement(item.requirements), + "", + "## Design", + _truncate_spec_layer(item.design, max_chars=_IMPLEMENT_DESIGN_MAX_CHARS, label="design"), + "", + "## Implementation tasks", + _implementation_tasks_for_inject(item), + ] + if item.checklist: + _append_checklist_block(lines, item.checklist) + lines += ["", "---", ""] + return "\n".join(lines) + + +_SPEC_LAYER_PLACEHOLDERS = frozenset( + { + "(No requirements yet.)", + "(No design yet.)", + "(No implementation tasks yet.)", + } +) + + +class WorkspaceTodos: + def __init__(self, workspace_dir: str | Path): + self.root = Path(workspace_dir).resolve() + workspace_meta_dir(self.root) + self.path = todos_json_path(self.root) + self.specs_root = specs_root(self.root) + + def repair_spec_folders(self) -> tuple[int, list[str]]: + """Create missing ``.cecli/specs/{id}/`` dirs and sync markdown from todos.json.""" + store = self.load() + created: list[str] = [] + for item in store.todos: + folder = self.specs_root / item.id + if not folder.is_dir(): + created.append(item.id) + self.sync_spec_files(item) + return len(created), created + + def prune_orphan_spec_folders(self) -> tuple[int, list[str]]: + """Remove ``.cecli/specs/{id}/`` dirs with no matching task in todos.json.""" + store = self.load() + known = {item.id for item in store.todos} + removed: list[str] = [] + if not self.specs_root.is_dir(): + return 0, removed + for entry in sorted(self.specs_root.iterdir()): + if not entry.is_dir() or entry.name.startswith("."): + continue + if entry.name in known: + continue + shutil.rmtree(entry) + removed.append(entry.name) + return len(removed), removed + + def sync_spec_files(self, item: TodoItem) -> None: + """Write three-layer markdown under ``.cecli/specs/{id}/`` for external editing.""" + item = migrate_todo_layers(item) + folder = self.specs_root / item.id + folder.mkdir(parents=True, exist_ok=True) + (folder / "requirements.md").write_text(item.requirements or "", encoding="utf-8") + (folder / "design.md").write_text(item.design or "", encoding="utf-8") + (folder / "tasks.md").write_text(item.tasks_md or "", encoding="utf-8") + + def resolve_spec_folder(self, todo_id: str) -> Path | None: + """``.cecli/specs/{id}/`` or short-id folder (first 8 chars).""" + tid = todo_id.strip() + candidates = [tid] + if len(tid) > 8: + candidates.append(tid[:8]) + for name in candidates: + folder = self.specs_root / name + if folder.is_dir(): + return folder + return None + + def maybe_import_spec_from_disk(self, item: TodoItem) -> TodoItem: + """Pull spec layers from disk when ``todos.json`` layers are still empty.""" + from cecli.spec.focus import todo_has_spec_content + + item = migrate_todo_layers(item) + if todo_has_spec_content(item): + return item + folder = self.resolve_spec_folder(item.id) + if folder is None: + return item + for filename in ("requirements.md", "design.md", "tasks.md"): + path = folder / filename + if path.is_file() and path.read_text(encoding="utf-8").strip(): + return self.import_spec_files(item.id) + return item + + def import_spec_files(self, todo_id: str) -> TodoItem: + """Load ``requirements.md`` / ``design.md`` / ``tasks.md`` from disk into the task.""" + item = self.get(todo_id) + folder = self.resolve_spec_folder(todo_id) + if folder is None: + raise ValueError(f"No spec folder for task: {todo_id}") + layers: dict[str, str] = {} + for filename, key in ( + ("requirements.md", "requirements"), + ("design.md", "design"), + ("tasks.md", "tasks_md"), + ): + path = folder / filename + if path.is_file(): + layers[key] = path.read_text(encoding="utf-8") + if not layers: + raise ValueError(f"Spec folder is empty: {folder}") + item, _ = self.update( + todo_id, + requirements=layers.get("requirements", item.requirements), + design=layers.get("design", item.design), + tasks_md=layers.get("tasks_md", item.tasks_md), + ) + return item + + def load(self) -> TodoStore: + if not self.path.is_file(): + return TodoStore() + try: + data = json.loads(self.path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return TodoStore() + if not isinstance(data, dict): + return TodoStore() + return TodoStore.from_dict(data) + + def save(self, store: TodoStore) -> None: + self.path.parent.mkdir(parents=True, exist_ok=True) + payload = json.dumps(store.to_dict(), indent=2, ensure_ascii=False) + self.path.write_text(payload + "\n", encoding="utf-8") + + def find(self, store: TodoStore, token: str) -> TodoItem | None: + token = token.strip() + if not token: + return None + for item in store.todos: + if item.id == token or item.id.startswith(token): + return item + lower = token.lower() + for item in store.todos: + if item.title.lower() == lower: + return item + return None + + def get(self, todo_id: str) -> TodoItem: + store = self.load() + item = self.find(store, todo_id) + if not item: + raise ValueError(f"Unknown task: {todo_id}") + return item + + def add(self, title: str, spec: str = "", *, template: str | None = None) -> TodoItem: + store = self.load() + tkey = (template or "").strip().lower() + layers = apply_layer_template(tkey) + if layers: + item = TodoItem( + id=uuid.uuid4().hex, + title=title.strip() or "Untitled", + requirements=layers.get("requirements", ""), + design=layers.get("design", ""), + tasks_md=layers.get("tasks_md", ""), + ) + else: + body = spec.strip() or apply_template(tkey) + item = TodoItem(id=uuid.uuid4().hex, title=title.strip() or "Untitled", spec=body) + migrate_todo_layers(item) + store.todos.insert(0, item) + self.save(store) + self.sync_spec_files(item) + return item + + def update( + self, + todo_id: str, + *, + title: str | None = None, + spec: str | None = None, + requirements: str | None = None, + design: str | None = None, + tasks_md: str | None = None, + depends_on: list[str] | None = None, + branch: str | None = None, + pr_url: str | None = None, + status: TodoStatus | None = None, + links: list[str] | None = None, + checklist: list[ChecklistItem] | None = None, + auto_complete_checklist: bool = True, + ) -> tuple[TodoItem, bool]: + """Returns ``(item, auto_completed)``.""" + store = self.load() + item = self.find(store, todo_id) + if not item: + raise ValueError(f"Unknown task: {todo_id}") + auto_completed = False + if title is not None: + item.title = title.strip() or "Untitled" + if spec is not None: + item.spec = spec + if requirements is not None: + item.requirements = requirements + if design is not None: + item.design = design + if tasks_md is not None: + item.tasks_md = tasks_md + if tasks_md.strip() and not any(c.text.strip() for c in item.checklist): + from cecli.spec.progress import materialize_checklist_from_tasks_md + + item.checklist = materialize_checklist_from_tasks_md(item) + if depends_on is not None: + item.depends_on = [d.strip() for d in depends_on if str(d).strip()] + if branch is not None: + item.branch = branch.strip() + if pr_url is not None: + item.pr_url = pr_url.strip() + if status is not None: + item.status = status + if links is not None: + item.links = list(links) + if checklist is not None: + item.checklist = checklist + if ( + auto_complete_checklist + and checklist is not None + and checklist_all_done(item) + and item.status not in ("done", "cancelled") + ): + item.status = "done" + auto_completed = True + if store.active_id == item.id: + store.active_id = None + item.updated_at = _now_iso() + if status == "done" and store.active_id == item.id: + store.active_id = None + migrate_todo_layers(item) + self.save(store) + self.sync_spec_files(item) + return item, auto_completed + + def import_markdown(self, text: str, *, merge: bool = False) -> TodoStore: + from cecli.spec.markdown import import_markdown + + store = import_markdown(text, self.load() if merge else None, merge=merge) + for item in store.todos: + migrate_todo_layers(item) + self.sync_spec_files(item) + self.save(store) + return store + + def export_markdown(self) -> str: + from cecli.spec.markdown import export_markdown + + return export_markdown(self.load()) + + def move(self, todo_id: str, direction: str) -> TodoStore: + """Move a task up/down in list order (``direction``: ``up`` | ``down``).""" + store = self.load() + idx = next((i for i, t in enumerate(store.todos) if t.id == todo_id), None) + if idx is None: + raise ValueError(f"Unknown task: {todo_id}") + delta = -1 if direction == "up" else 1 + new_idx = idx + delta + if new_idx < 0 or new_idx >= len(store.todos): + return store + store.todos[idx], store.todos[new_idx] = store.todos[new_idx], store.todos[idx] + self.save(store) + return store + + def delete(self, todo_id: str) -> None: + from cecli.spec.agent_todos import parse_agent_todo_link + + store = self.load() + item = next((t for t in store.todos if t.id == todo_id), None) + if item is None: + raise ValueError(f"Unknown task: {todo_id}") + agent_relpath = parse_agent_todo_link(item.links) + before = len(store.todos) + store.todos = [t for t in store.todos if t.id != todo_id] + if len(store.todos) == before: + raise ValueError(f"Unknown task: {todo_id}") + if store.active_id == todo_id: + store.active_id = None + self.save(store) + spec_folder = self.specs_root / todo_id + if spec_folder.is_dir(): + shutil.rmtree(spec_folder) + if agent_relpath: + agent_path = self.root / agent_relpath + if agent_path.is_file(): + agent_path.unlink() + + def set_active(self, todo_id: str | None) -> TodoStore: + store = self.load() + if todo_id: + item = self.find(store, todo_id) + if not item: + raise ValueError(f"Unknown task id: {todo_id}") + store.active_id = item.id + if item.status == "open": + item.status = "in_progress" + item.updated_at = _now_iso() + else: + store.active_id = None + self.save(store) + return store + + def mark_done(self, token: str) -> TodoItem: + store = self.load() + item = self.find(store, token) + if not item: + raise ValueError(f"Unknown task: {token}") + item.status = "done" + item.updated_at = _now_iso() + if store.active_id == item.id: + store.active_id = None + self.save(store) + return item + + def append_links(self, links: list[str], *, todo_id: str | None = None) -> None: + if not links: + return + store = self.load() + target = todo_id or store.active_id + if not target: + return + item = self.find(store, target) + if not item: + return + seen = set(item.links) + for link in links: + s = str(link).strip() + if s and s not in seen: + item.links.append(s) + seen.add(s) + item.updated_at = _now_iso() + self.save(store) diff --git a/tests/spec/README.md b/tests/spec/README.md new file mode 100644 index 00000000000..e5769724449 --- /dev/null +++ b/tests/spec/README.md @@ -0,0 +1,42 @@ +# cecli.spec tests + +Standalone unit tests for the spec-driven development stack (`cecli/spec/`): +EARS lint/index/trace, workspace todos, generate/refine prompts, implement focus, +agent todo linking, and job types. + +**No BrightVision or HTTP dependencies** — safe to run from the cecli repo alone. + +## Run + +From BrightVision root (recommended — uses repo `.venv`): + +```bash +source activate.sh +pip install -e cecli +python -m pytest cecli/tests/spec/ -q +``` + +From cecli submodule root: + +```bash +pip install -e . +python -m pytest tests/spec/ -q +``` + +Parent repo gate (unit + HTTP integration): + +```bash +yarn verify:ears +``` + +## Layout + +| File | Covers | +|------|--------| +| `test_spec_package.py` | Import smoke, no `bright_vision_core` imports | +| `test_ears_*.py` | Lint, index, trace, repair, report, prompt | +| `test_workspace_*.py` | Paths + todos persistence | +| `test_todo_*.py` | Markdown, phased generate, EARS in prompts | +| `test_spec_*.py` | Layers, steering, focus, gen agent, jobs, debug | +| `test_agent_todos.py` | Agent todo.txt ↔ workspace tasks | +| `test_implement_workspace.py` | Implement-step blocks | diff --git a/tests/spec/conftest.py b/tests/spec/conftest.py new file mode 100644 index 00000000000..eecc4a58901 --- /dev/null +++ b/tests/spec/conftest.py @@ -0,0 +1,10 @@ +"""Pytest path setup for spec tests (helpers is not an installed package).""" + +from __future__ import annotations + +import sys +from pathlib import Path + +_SPEC_DIR = Path(__file__).resolve().parent +if str(_SPEC_DIR) not in sys.path: + sys.path.insert(0, str(_SPEC_DIR)) diff --git a/tests/spec/helpers/spec_layer_assertions.py b/tests/spec/helpers/spec_layer_assertions.py new file mode 100644 index 00000000000..697e54d0ce1 --- /dev/null +++ b/tests/spec/helpers/spec_layer_assertions.py @@ -0,0 +1,54 @@ +"""Shared assertions for three-layer spec generation (pytest + LLM e2e).""" + +from __future__ import annotations + +from cecli.spec.layers import ( + assess_generated_spec_layers, + design_references_requirements, +) + +__all__ = [ + "SAMPLE_GENERATED_MARKDOWN", + "assess_generated_spec_layers", + "design_references_requirements", +] + +SAMPLE_GENERATED_MARKDOWN = """\ +## Requirements +### Introduction +A ping counter API exposes a health check and an increment endpoint for dogfooding. + +### REQ-001: Health check +**User Story:** As a client, I want a health endpoint, so that I can confirm the API is reachable. + +**Acceptance Criteria** +1. **WHEN** a client sends `GET /health` **THE** system **SHALL** respond with HTTP 200 and a JSON status. +2. **IF** the core is still starting **THEN THE** system **SHALL** respond with HTTP 503. + +### REQ-002: Increment counter +**User Story:** As a client, I want to increment a counter, so that I can verify state changes. + +**Acceptance Criteria** +1. **WHEN** a client sends `POST /count` **THE** system **SHALL** increment and return the new value. +2. **WHILE** the process is running **THE** system **SHALL** persist the count in memory. + +## Design +### Overview +REQ-001 maps to HTTP routes; REQ-002 uses an in-process store. +### Architecture +A FastAPI app routes /health and /count to handlers backed by a singleton counter. +### Components and Interfaces +- `health()` returns the status payload — REQ-001. +- `increment()` and the `Counter` store — REQ-002. +### Data Models +A Counter value with an integer "value" field, held in memory. +### Error Handling +Return HTTP 503 while starting (REQ-001); reject unknown methods with 405. +### Testing Strategy +Unit tests for the store plus HTTP tests for REQ-001 and REQ-002. + +## Implementation tasks +- [ ] 1. Add route for REQ-001 health check — _Requirements: REQ-001_ (depends: none) +- [ ] 2. Wire counter store and route for REQ-002 — _Requirements: REQ-002_ (depends: 1) +- [ ] 3. Add HTTP tests for REQ-001 and REQ-002 (depends: 2) +""" diff --git a/tests/spec/test_agent_todos.py b/tests/spec/test_agent_todos.py new file mode 100644 index 00000000000..9d85dec191a --- /dev/null +++ b/tests/spec/test_agent_todos.py @@ -0,0 +1,325 @@ +"""Cecli agent todo.txt → workspace Tasks bridge.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from cecli.spec.agent_todos import ( + AGENT_PLAN_TITLE, + AgentTodoRow, + AgentTodoSanitizeContext, + _recover_char_split_agent_rows, + agent_todo_link_for, + current_agent_todo_row, + export_todo_item_to_agent, + format_agent_todo_txt, + import_agent_plan_for_workspace, + load_agent_todo_rows, + parse_agent_todo_txt, + plan_title_from_rows, + rows_from_todo_item, + rows_to_tasks_md, + sanitize_agent_todo_rows, + sync_session_agent_todos, +) +from cecli.spec.todos import ChecklistItem, TodoItem, WorkspaceTodos, _now_iso + + +def test_parse_agent_todo_txt(): + raw = """Done: +✓ First done + +Remaining: +→ Current task +○ Next task +""" + rows = parse_agent_todo_txt(raw) + assert len(rows) == 3 + assert rows[0].done and rows[0].text == "First done" + assert rows[1].current and not rows[1].done + assert rows[2].text == "Next task" + + +def test_parse_agent_todo_txt_preserves_space_only_task_lines(): + # Char-split corruption uses ``○ {ch}``; a space task is ``○ `` (prefix + space). + rows = parse_agent_todo_txt("Remaining:\n○ \n○ x\n") + assert len(rows) == 2 + assert rows[0].text == " " + assert rows[1].text == "x" + + +def test_plan_title_skips_char_split_debris(): + broken = [AgentTodoRow(text=c, done=False, current=(c == "[")) for c in "[{"] + assert plan_title_from_rows(broken) == AGENT_PLAN_TITLE + + +def test_plan_title_uses_recovered_current_task(): + rows = [ + AgentTodoRow(text="Explore the codebase", done=False, current=True), + AgentTodoRow(text="Draft roadmap", done=False, current=False), + ] + assert plan_title_from_rows(rows) == "Explore the codebase" + + +def test_recover_char_split_agent_rows(): + json_text = ( + '[{"task": "Explore the codebase", "done": false, "current": true},' + '{"task": "Draft roadmap", "done": false}]' + ) + broken = [AgentTodoRow(text=c, done=False, current=False) for c in json_text] + rows = _recover_char_split_agent_rows(broken) + assert len(rows) == 2 + assert rows[0].text == "Explore the codebase" + assert rows[0].current + assert rows[1].text == "Draft roadmap" + + +def test_import_agent_plan_into_workspace(tmp_path: Path): + agents = tmp_path / ".cecli" / "agents" / "2026-05-27" / "abc" + agents.mkdir(parents=True) + (agents / "todo.txt").write_text( + "Remaining:\n→ Ship feature\n○ Write tests\n", + encoding="utf-8", + ) + store = import_agent_plan_for_workspace(tmp_path) + assert len(store.todos) == 1 + item = store.todos[0] + assert item.title == "Ship feature" + assert len(item.checklist) == 2 + assert store.active_id == item.id + assert item.status == "in_progress" + + # Second import updates same task + (agents / "todo.txt").write_text( + "Remaining:\n→ Ship feature\n✓ Write tests\n", + encoding="utf-8", + ) + store2 = import_agent_plan_for_workspace(tmp_path) + assert len(store2.todos) == 1 + assert store2.todos[0].checklist[1].done is True + + +def test_import_agent_plan_missing_file(tmp_path: Path): + with pytest.raises(FileNotFoundError): + import_agent_plan_for_workspace(tmp_path) + + +def test_try_import_agent_plan_returns_none_when_missing(tmp_path: Path): + from cecli.spec.agent_todos import try_import_agent_plan_for_workspace + + assert try_import_agent_plan_for_workspace(tmp_path) is None + + +def test_import_merges_into_active_task(tmp_path: Path): + api = WorkspaceTodos(tmp_path) + now = _now_iso() + user_task = TodoItem( + id="user1", + title="My feature", + tasks_md="", + status="in_progress", + links=[], + checklist=[], + created_at=now, + updated_at=now, + ) + store = api.load() + store.todos.append(user_task) + store.active_id = user_task.id + api.save(store) + + agents = tmp_path / ".cecli" / "agents" / "2026-05-27" / "sess" + agents.mkdir(parents=True) + rel = ".cecli/agents/2026-05-27/sess/todo.txt" + (agents / "todo.txt").write_text("Remaining:\n→ Step A\n○ Step B\n", encoding="utf-8") + + store2 = import_agent_plan_for_workspace(tmp_path, agent_todo_relpath=rel) + assert len(store2.todos) == 1 + item = store2.todos[0] + assert item.id == "user1" + assert item.title == "My feature" + assert len(item.checklist) == 2 + assert agent_todo_link_for(rel) in item.links + + +def test_import_agent_plan_preserves_spec_tasks_md(tmp_path: Path): + from cecli.spec.agent_todos import preserve_spec_tasks_md_on_agent_import + + spec_tasks = ( + "- [ ] 1. Wire generate-spec API for REQ-001 (depends: none)\n" + "- [ ] 2. Add tests for REQ-002 (depends: 1)\n" + ) + agent_tasks = rows_to_tasks_md( + [ + AgentTodoRow(text="Step A", done=False, current=True), + AgentTodoRow(text="Step B", done=False, current=False), + ] + ) + item = TodoItem( + id="user1", + title="My feature", + tasks_md=spec_tasks, + status="in_progress", + links=[], + checklist=[], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + assert preserve_spec_tasks_md_on_agent_import(item, agent_tasks) is True + + api = WorkspaceTodos(tmp_path) + store = api.load() + store.todos.append(item) + store.active_id = item.id + api.save(store) + + agents = tmp_path / ".cecli" / "agents" / "2026-05-27" / "sess" + agents.mkdir(parents=True) + rel = ".cecli/agents/2026-05-27/sess/todo.txt" + (agents / "todo.txt").write_text("Remaining:\n→ Step A\n○ Step B\n", encoding="utf-8") + + store2 = import_agent_plan_for_workspace(tmp_path, agent_todo_relpath=rel) + merged = store2.todos[0] + assert merged.tasks_md == spec_tasks + assert len(merged.checklist) == 2 + + +def test_export_roundtrip(tmp_path: Path): + rows = [ + AgentTodoRow(text="Done step", done=True, current=False), + AgentTodoRow(text="Now", done=False, current=True), + AgentTodoRow(text="Later", done=False, current=False), + ] + rel = ".cecli/agents/x/todo.txt" + item = TodoItem( + id="t1", + title="Plan", + tasks_md="", + status="in_progress", + links=[agent_todo_link_for(rel)], + checklist=[ + ChecklistItem(id="a", text="Done step", done=True), + ChecklistItem(id="b", text="Now", done=False), + ChecklistItem(id="c", text="Later", done=False), + ], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + assert rows_from_todo_item(item) == rows + export_todo_item_to_agent(tmp_path, rel, item) + path = tmp_path / rel + assert path.is_file() + parsed = parse_agent_todo_txt(path.read_text(encoding="utf-8")) + assert [r.text for r in parsed] == [r.text for r in rows] + assert format_agent_todo_txt(rows) in path.read_text(encoding="utf-8") + + +def test_sanitize_reverts_premature_done_beyond_focus(): + rows = [ + AgentTodoRow(text="1.3 Write unit tests", done=True, current=False), + AgentTodoRow(text="2.1 Create entities", done=True, current=False), + ] + ctx = AgentTodoSanitizeContext(focus_step="1.3", flutter_test_ok=None) + sanitized, warnings = sanitize_agent_todo_rows( + rows, + ctx=ctx, + prior_done_texts=frozenset(), + ) + assert sanitized[0].done is True + assert sanitized[1].done is False + assert warnings + + +def test_sanitize_reverts_test_done_without_flutter_pass(): + rows = [ + AgentTodoRow(text="1.3 Write unit tests for NetworkInterceptor", done=True, current=False) + ] + ctx = AgentTodoSanitizeContext(focus_step="1.3", flutter_test_ok=False) + sanitized, warnings = sanitize_agent_todo_rows( + rows, + ctx=ctx, + prior_done_texts=frozenset(), + ) + assert sanitized[0].done is False + assert warnings + + +def test_current_agent_todo_row_prefers_marked_current(tmp_path: Path): + rows = [ + AgentTodoRow(text="Done step", done=True, current=False), + AgentTodoRow(text="3.1 Encrypted storage", done=False, current=True), + AgentTodoRow(text="3.2 Later", done=False, current=False), + ] + row = current_agent_todo_row(rows) + assert row is not None + assert row.text.startswith("3.1") + + +def test_load_agent_todo_rows_from_latest(tmp_path: Path): + agents = tmp_path / ".cecli" / "agents" / "2026-06-07" / "abc" + agents.mkdir(parents=True) + (agents / "todo.txt").write_text( + "Remaining:\n→ 3.1 Develop EncryptedStorageRepository\n", + encoding="utf-8", + ) + rows = load_agent_todo_rows(tmp_path) + assert len(rows) == 1 + assert rows[0].current + assert "3.1" in rows[0].text + + +def test_sync_session_pull_prefers_linked_agent_todo_over_stale_session_copy(tmp_path: Path): + """Pre-session push must not revert a later workspace import from the linked todo.txt.""" + spec_tasks = ( + "## Implementation tasks\n\n" + "- [ ] 1. Wire generate-spec API for REQ-001 (depends: none)\n" + "- [ ] 2. Add tests for REQ-002 (depends: 1)\n" + ) + api = WorkspaceTodos(tmp_path) + store = api.load() + item = TodoItem( + id="user1", + title="My feature", + tasks_md=spec_tasks, + status="in_progress", + links=[], + checklist=[], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + store.todos.append(item) + store.active_id = item.id + api.save(store) + + class FakeCoder: + def __init__(self, root: Path): + self.root = root + + def local_agent_folder(self, name: str) -> str: + return f".cecli/agents/2026-06-03/session-a/{name}" + + class FakeSession: + def __init__(self, root: Path): + self.coder = FakeCoder(root) + + session_rel = ".cecli/agents/2026-06-03/session-a/todo.txt" + export_todo_item_to_agent(tmp_path, session_rel, item) + + linked_rel = ".cecli/agents/2026-05-27/imported/todo.txt" + linked = tmp_path / linked_rel + linked.parent.mkdir(parents=True) + linked.write_text( + "Done:\n" + "✓ 1. Wire generate-spec API for REQ-001 (depends: none)\n\n" + "Remaining:\n" + "→ 2. Add tests for REQ-002 (depends: 1)\n", + encoding="utf-8", + ) + import_agent_plan_for_workspace(tmp_path, agent_todo_relpath=linked_rel) + + store2, _ = sync_session_agent_todos(FakeSession(tmp_path), pull=True, push_active=True) + merged = store2.todos[0] + assert "- [x] 1. Wire generate-spec" in merged.tasks_md + assert merged.checklist[0].done is True diff --git a/tests/spec/test_ears_index.py b/tests/spec/test_ears_index.py new file mode 100644 index 00000000000..b6a57bc8b2c --- /dev/null +++ b/tests/spec/test_ears_index.py @@ -0,0 +1,46 @@ +"""EARS spec index (roadmap #22 / E3).""" + +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from cecli.spec.ears import build_spec_index + + +class TestEarsSpecIndex(unittest.TestCase): + def test_orphan_folder_and_global_dup(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + specs = root / ".cecli" / "specs" + for tid, req in ( + ("task-a", "### REQ-001\n**WHEN** a\n**THE** system **SHALL** do A.\n"), + ("task-b", "### REQ-001\n**WHEN** b\n**THE** system **SHALL** do B.\n"), + ("orphan", "### REQ-002\n**WHEN** c\n**THE** system **SHALL** do C.\n"), + ): + folder = specs / tid + folder.mkdir(parents=True) + (folder / "requirements.md").write_text(req, encoding="utf-8") + + result = build_spec_index(root, task_ids=["task-a", "task-b"]) + codes = {i.code for i in result.issues} + self.assertIn("SPEC_ORPHAN_FOLDER", codes) + self.assertIn("SPEC_REQ_ID_GLOBAL_DUP", codes) + self.assertFalse(result.ok) + + def test_missing_folder_info(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + result = build_spec_index(root, task_ids=["only-json"]) + self.assertTrue(any(i.code == "SPEC_MISSING_FOLDER" for i in result.issues)) + + def test_to_dict_serializable(self): + with tempfile.TemporaryDirectory() as tmp: + d = build_spec_index(tmp, task_ids=[]).to_dict() + self.assertIn("folders", d) + self.assertIn("issues", d) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_ears_lint.py b/tests/spec/test_ears_lint.py new file mode 100644 index 00000000000..bc5ac5e7a73 --- /dev/null +++ b/tests/spec/test_ears_lint.py @@ -0,0 +1,96 @@ +"""EARS module — deterministic requirements lint.""" + +from __future__ import annotations + +import unittest + +from cecli.spec.ears import analyze_requirements + +GOOD = """\ +### REQ-001 +**WHEN** the user opens Tasks +**THE** system **SHALL** show the active task chip. + +### REQ-002 +**WHEN** the user saves a requirement +**THE** system **SHALL** sync to `.cecli/specs/{id}/requirements.md`. +""" + +BAD_NO_SHALL = """\ +### REQ-001 +**WHEN** the user opens Tasks +**THE** system shows the active task chip. +""" + +DUP_ID = """\ +### REQ-001 +**WHEN** a +**THE** system **SHALL** do A. + +### REQ-001 +**WHEN** b +**THE** system **SHALL** do B. +""" + +# Kiro-style: one titled requirement with a User Story and several acceptance criteria. +KIRO_MULTI_AC = """\ +### Introduction +The feature exposes a health endpoint. + +### REQ-001: Health check +**User Story:** As a client, I want a health endpoint, so that I can confirm the API is up. + +**Acceptance Criteria** +1. **WHEN** a client sends `GET /health` **THE** system **SHALL** respond with HTTP 200. +2. **IF** the core is starting **THEN THE** system **SHALL** respond with HTTP 503. +3. **WHILE** running **THE** system **SHALL** report uptime. +""" + + +class TestEarsLint(unittest.TestCase): + def test_good_requirements_ok(self): + r = analyze_requirements(GOOD) + self.assertTrue(r.ok) + self.assertGreaterEqual(len(r.clauses), 2) + self.assertFalse(any(i.code == "EARS_NO_SHALL" for i in r.issues)) + + def test_missing_shall_errors(self): + r = analyze_requirements(BAD_NO_SHALL) + self.assertFalse(r.ok) + self.assertTrue(any(i.code == "EARS_NO_SHALL" for i in r.issues)) + + def test_duplicate_req_id(self): + r = analyze_requirements(DUP_ID) + self.assertFalse(r.ok) + self.assertTrue(any(i.code == "EARS_DUP_ID" for i in r.issues)) + + def test_kiro_multi_acceptance_criteria_ok(self): + """One titled requirement with several ACs must not trip EARS_DUP_ID.""" + r = analyze_requirements(KIRO_MULTI_AC) + self.assertTrue(r.ok, [i.to_dict() for i in r.issues]) + self.assertFalse(any(i.code == "EARS_DUP_ID" for i in r.issues)) + # Three acceptance criteria → three clauses, all under REQ-001. + self.assertEqual(len([c for c in r.clauses if c.req_id == "REQ-001"]), 3) + # The descriptive User Story line is not linted as a normative clause. + self.assertFalse(any("User Story" in c.text for c in r.clauses)) + + def test_titled_heading_carries_req_id(self): + r = analyze_requirements(KIRO_MULTI_AC) + self.assertTrue(all(c.req_id == "REQ-001" for c in r.clauses)) + + def test_kiro_user_story_with_if_while_not_clauses(self): + """User Story prose must not lint as EARS (common words if/while).""" + text = """\ +### REQ-002: Cloud sync +**User Story:** As a user, I want sync while traveling, so that I can access logs if I lose signal. + +**Acceptance Criteria** +1. **WHERE** sync is enabled **THEN THE** system **SHALL** encrypt data. +""" + r = analyze_requirements(text) + self.assertTrue(r.ok, [i.to_dict() for i in r.issues]) + self.assertFalse(any("User Story" in c.text for c in r.clauses)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_ears_prompt.py b/tests/spec/test_ears_prompt.py new file mode 100644 index 00000000000..5d582602b78 --- /dev/null +++ b/tests/spec/test_ears_prompt.py @@ -0,0 +1,52 @@ +"""EARS + trace prompt context for generate/refine (E5).""" + +from __future__ import annotations + +import unittest + +from cecli.spec.ears.prompt import ( + format_spec_quality_for_prompt, + requirements_pass_ears, +) + + +class TestEarsPrompt(unittest.TestCase): + def test_format_spec_quality_includes_lint_trace_and_depth(self): + req = "### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n" + design = "## Overview\nShort." + tasks = "- [ ] 1. Step (depends: none)" + block = format_spec_quality_for_prompt(req, design, tasks) + self.assertIn("Current spec quality", block) + self.assertIn("EARS:", block) + self.assertIn("Trace:", block) + self.assertIn("Deepen the spec", block) + self.assertIn("REQ-###", block) + + def test_format_spec_quality_requirements_only(self): + req = "### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n" + block = format_spec_quality_for_prompt(req, "", "") + self.assertIn("EARS:", block) + self.assertNotIn("Trace: no REQ", block) + + def test_requirements_pass_ears_blocks_errors_only(self): + ok, issues = requirements_pass_ears( + "### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n" + ) + self.assertTrue(ok) + self.assertEqual(issues, []) + + bad, issues = requirements_pass_ears("### REQ-001\n**WHEN** x\n**THE** system shows y.\n") + self.assertFalse(bad) + self.assertTrue(any(i["code"] == "EARS_NO_SHALL" for i in issues)) + self.assertTrue(all(i["severity"] == "error" for i in issues)) + + def test_requirements_pass_ears_returns_errors_only(self): + """Gate issues list contains severity=error entries only.""" + bad, issues = requirements_pass_ears("### REQ-001\n**WHEN** x\n**THE** system shows y.\n") + self.assertFalse(bad) + self.assertTrue(issues) + self.assertTrue(all(i["severity"] == "error" for i in issues)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_ears_repair.py b/tests/spec/test_ears_repair.py new file mode 100644 index 00000000000..f9cc0fa23b5 --- /dev/null +++ b/tests/spec/test_ears_repair.py @@ -0,0 +1,58 @@ +"""EARS repair helper for compact LLM spec generation.""" + +from __future__ import annotations + +import unittest + +from cecli.spec.ears.lint import analyze_requirements +from cecli.spec.ears.repair import repair_requirements_missing_shall + + +class TestEarsRepair(unittest.TestCase): + def test_repairs_when_only_clauses(self): + raw = """\ +### REQ-001: A +**Acceptance Criteria** +1. **WHEN** a client calls the API +2. **WHEN** the core is idle +""" + fixed = repair_requirements_missing_shall(raw) + result = analyze_requirements(fixed) + self.assertTrue(result.ok, result.issues) + + def test_repairs_numbered_bullet_without_shall(self): + raw = """\ +### REQ-001: Health +1. **WHEN** a client calls GET /health +### REQ-002: Payload +2. Response body includes a status field +""" + fixed = repair_requirements_missing_shall(raw) + result = analyze_requirements(fixed) + self.assertTrue(result.ok, result.issues) + self.assertIn("SHALL", fixed) + + def test_repairs_if_then_prose_without_shall(self): + raw = """\ +### REQ-001: Health +1. **WHEN** a client calls GET /health **THE** system **SHALL** respond with HTTP 200. +### REQ-002: Payload +**IF** the status is ok **THEN** include the literal value `ok` in the JSON body. +""" + fixed = repair_requirements_missing_shall(raw) + result = analyze_requirements(fixed) + self.assertTrue(result.ok, result.issues) + self.assertIn("SHALL", fixed.split("REQ-002")[1]) + + def test_repairs_where_prose_without_shall(self): + raw = """\ +### REQ-002: Auth +**WHERE** the client is unauthenticated the API returns HTTP 401. +""" + fixed = repair_requirements_missing_shall(raw) + result = analyze_requirements(fixed) + self.assertTrue(result.ok, result.issues) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_ears_report.py b/tests/spec/test_ears_report.py new file mode 100644 index 00000000000..0415e175cd4 --- /dev/null +++ b/tests/spec/test_ears_report.py @@ -0,0 +1,64 @@ +"""EARS report formatters for UI, logs, and LLM prompt context.""" + +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from cecli.spec.ears import analyze_requirements, analyze_traceability, build_spec_index +from cecli.spec.ears.report import ( + format_lint_summary, + format_spec_index_summary, + format_trace_summary, +) + + +class TestEarsReport(unittest.TestCase): + def test_format_lint_summary_ok(self): + result = analyze_requirements("### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n") + summary = format_lint_summary(result) + self.assertIn("no issues", summary.lower()) + + def test_format_lint_summary_errors(self): + result = analyze_requirements("### REQ-001\n**WHEN** x\n**THE** system shows y.\n") + summary = format_lint_summary(result) + self.assertIn("error", summary.lower()) + self.assertIn("EARS_NO_SHALL", summary) + + def test_format_trace_summary_counts_coverage(self): + req = "### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n" + design = "## Overview\nImplements REQ-001.\n" + tasks = "- [ ] 1. Step (depends: none) — REQ-001\n" + trace = analyze_traceability(req, design, tasks) + summary = format_trace_summary(trace) + self.assertIn("Trace:", summary) + self.assertIn("1/1", summary) + + def test_format_trace_summary_no_req_ids(self): + trace = analyze_traceability("", "design", "tasks") + summary = format_trace_summary(trace) + self.assertIn("no REQ", summary) + + def test_format_spec_index_summary_empty_workspace(self): + with tempfile.TemporaryDirectory() as tmp: + result = build_spec_index(tmp, task_ids=[]) + summary = format_spec_index_summary(result) + self.assertIn("Spec index:", summary) + self.assertIn("OK", summary) + + def test_format_spec_index_summary_orphan_folder(self): + with tempfile.TemporaryDirectory() as tmp: + orphan = Path(tmp) / ".cecli" / "specs" / "orphan-task" + orphan.mkdir(parents=True) + (orphan / "requirements.md").write_text( + "### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n", + encoding="utf-8", + ) + result = build_spec_index(tmp, task_ids=["known-task"]) + summary = format_spec_index_summary(result) + self.assertIn("warning", summary.lower()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_ears_trace.py b/tests/spec/test_ears_trace.py new file mode 100644 index 00000000000..8b57bfda48d --- /dev/null +++ b/tests/spec/test_ears_trace.py @@ -0,0 +1,52 @@ +"""EARS traceability (E4).""" + +from __future__ import annotations + +import unittest + +from cecli.spec.ears import analyze_traceability + +REQ = """\ +### REQ-001 +**WHEN** the user saves +**THE** system **SHALL** persist specs. + +### REQ-002 +**WHEN** the user traces +**THE** system **SHALL** report gaps. +""" + +DESIGN = """\ +## Overview +Covers REQ-001 in the persistence layer. +""" + +TASKS = """\ +- [ ] 1. Wire HTTP for REQ-002 (depends: none) +- [x] 2. Add tests (depends: 1) +""" + + +class TestEarsTrace(unittest.TestCase): + def test_links_and_uncovered_warning(self): + r = analyze_traceability(REQ, DESIGN, TASKS) + by_id = {link.req_id: link for link in r.links} + self.assertTrue(by_id["REQ-001"].in_design) + self.assertIn(1, by_id["REQ-002"].task_steps) + self.assertEqual(len(r.steps), 2) + + bare = analyze_traceability(REQ, "", "") + self.assertTrue(any(i.code == "TRACE_REQ_UNCOVERED" for i in bare.issues)) + + def test_unknown_req_in_tasks_errors(self): + r = analyze_traceability( + REQ, + DESIGN, + "- [ ] 1. Fix REQ-999 (depends: none)\n", + ) + self.assertFalse(r.ok) + self.assertTrue(any(i.code == "TRACE_REQ_UNKNOWN" for i in r.issues)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_generate_spec_parse.py b/tests/spec/test_generate_spec_parse.py new file mode 100644 index 00000000000..773422076cb --- /dev/null +++ b/tests/spec/test_generate_spec_parse.py @@ -0,0 +1,117 @@ +"""Parse + sanity checks for generate/refine spec output (no LLM).""" + +from __future__ import annotations + +import unittest + +from helpers.spec_layer_assertions import ( + SAMPLE_GENERATED_MARKDOWN, + assess_generated_spec_layers, +) + +from cecli.spec.generate import parse_generated_layers +from cecli.spec.layers import ( + assess_spec_richness, + normalize_spec_layer_traceability, + normalize_tasks_md_numbering, +) + + +class TestGenerateSpecParse(unittest.TestCase): + def test_parse_three_sections(self): + layers = parse_generated_layers(SAMPLE_GENERATED_MARKDOWN) + self.assertIn("REQ-001", layers.get("requirements", "")) + self.assertIn("Overview", layers.get("design", "")) + self.assertRegex(layers.get("tasks_md", ""), r"1\.\s+Add route") + + def test_sample_passes_sanity(self): + layers = parse_generated_layers(SAMPLE_GENERATED_MARKDOWN) + ok, issues = assess_generated_spec_layers( + layers.get("requirements", ""), + layers.get("design", ""), + layers.get("tasks_md", ""), + ) + self.assertTrue(ok, issues) + + def test_normalize_adds_design_traceability(self): + layers = { + "requirements": "### REQ-001\n**WHEN** x\n**THE** system **SHALL** a.\n", + "design": "## Overview\nHTTP API only.", + "tasks_md": "- [ ] 1. Step (depends: none)", + } + out = normalize_spec_layer_traceability(layers) + self.assertIn("REQ-001", out["design"]) + ok, issues = assess_generated_spec_layers( + out["requirements"], + out["design"], + out["tasks_md"], + ) + self.assertTrue(ok, issues) + + def test_sample_is_kiro_rich(self): + """The shared fixture should now read as a rich, Kiro-grade spec.""" + layers = parse_generated_layers(SAMPLE_GENERATED_MARKDOWN) + rich, suggestions = assess_spec_richness( + layers.get("requirements", ""), + layers.get("design", ""), + layers.get("tasks_md", ""), + ) + self.assertTrue(rich, suggestions) + + def test_richness_flags_thin_spec(self): + rich, suggestions = assess_spec_richness( + requirements="### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n", + design="## Overview\nshort", + tasks_md="- [ ] 1. Do it (depends: none)", + ) + self.assertFalse(rich) + joined = " ".join(suggestions) + self.assertIn("User Story", joined) + self.assertIn("design:", joined) + self.assertIn("tasks:", joined) + + def test_normalize_after_merge_for_phased_design(self): + """Phased design parse omits requirements; merge must precede normalize.""" + parsed_only = { + "requirements": "", + "design": "## Overview\nHTTP API only.", + "tasks_md": "", + } + self.assertNotIn("REQ-001", normalize_spec_layer_traceability(parsed_only)["design"]) + merged = { + "requirements": "### REQ-001\n**WHEN** x\n**THE** system **SHALL** a.\n", + "design": "## Overview\nHTTP API only.", + "tasks_md": "", + } + out = normalize_spec_layer_traceability(merged) + self.assertIn("REQ-001", out["design"]) + + def test_normalize_numbered_tasks_from_plain_bullets(self): + tasks = "- [ ] Implement health route\n- [ ] Add HTTP test\n" + out = normalize_tasks_md_numbering(tasks) + self.assertRegex(out, r"1\.\s+Implement") + self.assertRegex(out, r"2\.\s+Add HTTP") + ok, issues = assess_generated_spec_layers( + "### REQ-001\n**WHEN** x\n**THE** system **SHALL** a.\n", + "## Overview\nREQ-001", + out, + ) + self.assertTrue(ok, issues) + + def test_normalize_tasks_via_traceability_helper(self): + layers = { + "requirements": "### REQ-001\n**WHEN** x\n**THE** system **SHALL** a.\n", + "design": "## Overview\nHTTP API only.", + "tasks_md": "- Implement endpoint\n- Add test\n", + } + out = normalize_spec_layer_traceability(layers) + ok, issues = assess_generated_spec_layers( + out["requirements"], + out["design"], + out["tasks_md"], + ) + self.assertTrue(ok, issues) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_implement_workspace.py b/tests/spec/test_implement_workspace.py new file mode 100644 index 00000000000..ac980a021e9 --- /dev/null +++ b/tests/spec/test_implement_workspace.py @@ -0,0 +1,244 @@ +"""Tests for implement workspace snapshot injection.""" + +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from cecli.spec.agent_todos import AgentTodoRow +from cecli.spec.implement import ( + build_implement_workspace_block, + build_workspace_snapshot_lines, + checklist_step_prefix, + dart_test_paths_for_focus, + deliverable_paths_exist, + focus_checklist_item, + is_step_after, + paths_from_checklist_text, + resolve_flutter_executable, + resolve_implement_focus, +) +from cecli.spec.todos import ChecklistItem + + +class TestImplementWorkspace(unittest.TestCase): + def test_paths_from_checklist_text(self): + text = "1.2 Implement NetworkInterceptor in lib/core/network/" + assert paths_from_checklist_text(text) == ["lib/core/network"] + nested = "1. Scaffold `client/package.json` and root `package.json`" + assert paths_from_checklist_text(nested) == ["client/package.json", "package.json"] + + def test_deliverable_paths_exist(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + net = root / "lib" / "core" / "network" + net.mkdir(parents=True) + (net / "interceptor.dart").write_text("// x", encoding="utf-8") + self.assertTrue(deliverable_paths_exist(root, ["lib/core/network"])) + + def test_focus_prefers_active_task_title_over_first_open(self): + checklist = [ + ChecklistItem( + id="c1", text="1.2 Implement NetworkInterceptor in lib/core/network/", done=False + ), + ChecklistItem(id="c2", text="1.3 Write unit tests for NetworkInterceptor", done=False), + ] + focus = focus_checklist_item( + checklist, + message="Implement the active task per the injected requirements.", + active_task_title="1.3 Write unit tests for NetworkInterceptor", + ) + self.assertEqual(focus.text, checklist[1].text) + + def test_focus_from_implement_step_message(self): + checklist = [ + ChecklistItem(id="c1", text="1.1 Scaffold lib/", done=True), + ChecklistItem(id="c2", text="1.3 Write unit tests for NetworkInterceptor", done=False), + ] + focus = focus_checklist_item( + checklist, + message="/agent Implement only implementation task 1.3: Write unit tests for NetworkInterceptor.", + ) + self.assertEqual(focus.text, checklist[1].text) + + def test_step_ordering(self): + self.assertTrue(is_step_after("2.1", "1.3")) + self.assertFalse(is_step_after("1.2", "1.3")) + + def test_focus_prefers_active_task_title_even_when_done(self): + checklist = [ + ChecklistItem(id="c1", text="1.3 Write unit tests for NetworkInterceptor", done=True), + ChecklistItem(id="c2", text="2.2 Define abstract repository interfaces", done=False), + ] + focus = focus_checklist_item( + checklist, + message="/agent Continue the active task from where you stopped.", + active_task_title="1.3 Write unit tests for NetworkInterceptor", + ) + self.assertEqual(focus.text, checklist[0].text) + + def test_test_paths_for_focus_requires_named_path(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pubspec.yaml").write_text("name: x\n", encoding="utf-8") + test_path = root / "test" / "core" / "network" / "network_interceptor_test.dart" + test_path.parent.mkdir(parents=True) + test_path.write_text("", encoding="utf-8") + focus = ChecklistItem( + id="c1", + text="1.3 Write unit tests in `test/core/network/network_interceptor_test.dart`", + done=False, + ) + paths = dart_test_paths_for_focus(root, focus) + self.assertEqual(paths, ["test/core/network/network_interceptor_test.dart"]) + + def test_test_paths_for_focus_ignores_unnamed_files(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pubspec.yaml").write_text("name: x\n", encoding="utf-8") + test_dir = root / "test" / "core" / "network" + test_dir.mkdir(parents=True) + (test_dir / "network_interceptor_test.dart").write_text("", encoding="utf-8") + focus = ChecklistItem( + id="c1", text="1.3 Write unit tests for NetworkInterceptor", done=False + ) + self.assertEqual(dart_test_paths_for_focus(root, focus), []) + + def test_snapshot_lists_top_level_only(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pubspec.yaml").write_text("name: x\n", encoding="utf-8") + lib = root / "lib" / "core" / "network" + lib.mkdir(parents=True) + (lib / "a.dart").write_text("", encoding="utf-8") + test = root / "test" / "core" / "network" + test.mkdir(parents=True) + (test / "a_test.dart").write_text("", encoding="utf-8") + checklist = [ + ChecklistItem( + id="c1", + text="1.3 Write unit tests in `test/core/network/a_test.dart`", + done=False, + ), + ] + block = build_implement_workspace_block( + root, + checklist, + resume=True, + active_task_title="1.3 Write unit tests in `test/core/network/a_test.dart`", + ) + self.assertIn("Workspace snapshot", block) + self.assertIn("`lib/`", block) + self.assertIn("`test/`", block) + self.assertNotIn("lib/core/network/a.dart", block) + self.assertIn("test/core/network/a_test.dart", block) + self.assertIn("flutter test", block) + + def test_continuation_block_is_trimmed(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pubspec.yaml").write_text("name: x\n", encoding="utf-8") + block = build_implement_workspace_block( + root, + [], + resume=True, + agent_continuation=True, + ) + self.assertIn("Continue (trimmed", block) + + def test_resolve_focus_from_agent_todo_when_checklist_all_done(self): + checklist = [ + ChecklistItem(id="c1", text="2.2 Define abstract repository interfaces", done=True), + ChecklistItem(id="c2", text="2.3 Write unit tests mocking repositories", done=True), + ] + agent_rows = [ + AgentTodoRow( + text="3.1 Develop EncryptedStorageRepository for local encrypted data", + done=False, + current=True, + ), + ] + focus, from_agent = resolve_implement_focus( + checklist, + message="/agent Continue the active task.", + active_task_title="Agent session plan", + agent_todo_rows=agent_rows, + ) + self.assertTrue(from_agent) + self.assertIn("3.1 Develop EncryptedStorageRepository", focus.text) + + def test_build_block_uses_agent_todo_when_checklist_done(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pubspec.yaml").write_text("name: x\n", encoding="utf-8") + agents = root / ".cecli" / "agents" / "2026-06-07" / "abc" + agents.mkdir(parents=True) + (agents / "todo.txt").write_text( + "Remaining:\n→ 3.1 Develop EncryptedStorageRepository for local encrypted data\n", + encoding="utf-8", + ) + checklist = [ + ChecklistItem(id="c1", text="2.2 Define abstract repository interfaces", done=True), + ] + block = build_implement_workspace_block( + root, + checklist, + resume=True, + active_task_title="Agent session plan", + ) + self.assertIn("Agent todo", block) + self.assertIn("3.1 Develop EncryptedStorageRepository", block) + self.assertNotIn("All checklist items are marked done", block) + + def test_checklist_step_prefix(self): + self.assertEqual(checklist_step_prefix("1.3 Write unit tests"), "1.3") + + @patch("cecli.spec.implement.shutil.which", return_value="/opt/flutter/bin/flutter") + def test_resolve_flutter_executable(self, _which): + self.assertEqual(resolve_flutter_executable(), "/opt/flutter/bin/flutter") + + def test_snapshot_top_level_is_factual(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "alpha").mkdir() + (root / "notes.txt").write_text("hello", encoding="utf-8") + lines = build_workspace_snapshot_lines(root) + blob = "\n".join(lines) + self.assertIn("Top level", blob) + self.assertIn("`alpha/`", blob) + self.assertIn("`notes.txt`", blob) + + def test_scaffold_step_uses_checklist_paths(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + checklist = [ + ChecklistItem( + id="c1", + text="1. Scaffold the workspace (`package.json`)", + done=False, + ), + ] + block = build_implement_workspace_block(root, checklist, resume=False) + self.assertIn("package.json", block) + self.assertIn("ContextManager create", block) + + def test_no_path_checklist_points_at_implementation_tasks(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + checklist = [ + ChecklistItem( + id="c1", + text="1. Scaffold the monorepo workspace and shared tooling", + done=False, + ), + ] + block = build_implement_workspace_block(root, checklist, resume=False) + self.assertIn("names **no file paths**", block) + self.assertIn("Implementation tasks", block) + self.assertIn("orientation only", block) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_progress.py b/tests/spec/test_progress.py new file mode 100644 index 00000000000..554184be96c --- /dev/null +++ b/tests/spec/test_progress.py @@ -0,0 +1,195 @@ +"""Tests for unified spec implementation progress (checklist ↔ tasks_md ↔ agent).""" + +from __future__ import annotations + +from pathlib import Path + +from cecli.spec.agent_todos import ( + AgentTodoRow, + import_agent_plan_store, +) +from cecli.spec.progress import ( + checklist_from_agent_rows, + implementation_steps, + mark_implementation_step_done, + materialize_checklist_from_tasks_md, + merge_agent_progress_into_tasks_md, + next_open_implementation_step, + try_mark_focus_step_complete, +) +from cecli.spec.todos import ChecklistItem, TodoItem, TodoStore, _now_iso + + +def _item(*, tasks_md: str = "", checklist: list[ChecklistItem] | None = None) -> TodoItem: + return TodoItem( + id="t1", + title="Feature", + tasks_md=tasks_md, + checklist=checklist or [], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + + +def test_merge_agent_progress_into_tasks_md_preserves_rich_text(): + tasks_md = ( + "## Implementation tasks\n\n" + "- [ ] 1. Wire API for REQ-001 (depends: none)\n" + ' - verify: `python -c "import api"`\n' + "- [ ] 2. Add tests for REQ-002 (depends: 1)\n" + ) + rows = [ + AgentTodoRow(text="1. Wire API for REQ-001 (depends: none)", done=True, current=False), + AgentTodoRow(text="2. Add tests for REQ-002 (depends: 1)", done=False, current=True), + ] + merged = merge_agent_progress_into_tasks_md(tasks_md, rows) + assert "- [x] 1. Wire API" in merged + assert "verify: `python" in merged + assert "- [ ] 2. Add tests" in merged + + +def test_checklist_from_agent_rows_reuses_stable_ids(): + prior = [ + ChecklistItem(id="keep-me", text="1. First step", done=False), + ChecklistItem(id="also-keep", text="2. Second step", done=False), + ] + rows = [ + AgentTodoRow(text="1. First step", done=True, current=False), + AgentTodoRow(text="2. Second step", done=False, current=True), + ] + out = checklist_from_agent_rows(rows, prior=prior) + assert [c.id for c in out] == ["keep-me", "also-keep"] + assert out[0].done is True + + +def test_materialize_checklist_from_tasks_md(): + item = _item( + tasks_md="- [ ] 1. Scaffold lib/ (depends: none)\n- [x] 2. Add tests\n", + ) + checklist = materialize_checklist_from_tasks_md(item) + assert len(checklist) == 2 + assert checklist[0].done is False + assert checklist[1].done is True + + +def test_implementation_steps_prefers_checklist(): + item = _item( + tasks_md="- [ ] 9. Ignored when checklist present\n", + checklist=[ChecklistItem(id="a", text="1. Real step", done=False)], + ) + steps = implementation_steps(item) + assert len(steps) == 1 + assert steps[0].step_id == "1" + + +def test_next_open_implementation_step_after_completed(): + item = _item( + tasks_md=("- [x] 1. Done\n" "- [ ] 2. Next\n" "- [ ] 3. Later\n"), + ) + nxt = next_open_implementation_step(item, after="1") + assert nxt is not None + assert nxt.step_id == "2" + + +def test_mark_implementation_step_done_updates_both_layers(): + item = _item( + tasks_md="- [ ] 1. Wire module\n- [ ] 2. Add tests\n", + checklist=[ + ChecklistItem(id="a", text="1. Wire module", done=False), + ChecklistItem(id="b", text="2. Add tests", done=False), + ], + ) + updated = mark_implementation_step_done(item, "1", done=True) + assert updated.checklist[0].done is True + assert "- [x] 1. Wire module" in updated.tasks_md + assert "- [ ] 2. Add tests" in updated.tasks_md + + +def test_try_mark_focus_step_complete_on_verify_pass(): + item = _item( + tasks_md="- [ ] 1. Run lint\n - verify: `true`\n", + checklist=[ChecklistItem(id="a", text="1. Run lint", done=False)], + ) + updated, changed = try_mark_focus_step_complete( + item, + "1", + flutter_test_ok=None, + verify_ok=True, + ) + assert changed + assert updated.checklist[0].done is True + + +def test_try_mark_focus_step_complete_requires_flutter_for_test_step(): + item = _item( + checklist=[ChecklistItem(id="a", text="1.3 Write unit tests", done=False)], + ) + _, changed = try_mark_focus_step_complete( + item, + "1.3", + flutter_test_ok=False, + verify_ok=True, + ) + assert changed is False + + +def test_import_agent_plan_merges_done_into_preserved_tasks_md(): + spec_tasks = ( + "## Implementation tasks\n\n" + "- [ ] 1. Wire generate-spec API for REQ-001 (depends: none)\n" + "- [ ] 2. Add tests for REQ-002 (depends: 1)\n" + ) + store = TodoStore( + todos=[ + TodoItem( + id="user1", + title="My feature", + tasks_md=spec_tasks, + status="in_progress", + links=[], + checklist=[], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + ], + active_id="user1", + ) + rows = [ + AgentTodoRow( + text="1. Wire generate-spec API for REQ-001 (depends: none)", + done=True, + current=False, + ), + AgentTodoRow(text="2. Add tests for REQ-002 (depends: 1)", done=False, current=True), + ] + out = import_agent_plan_store(store, rows, target_todo_id="user1") + item = out.todos[0] + assert "- [x] 1. Wire generate-spec" in item.tasks_md + assert "REQ-001" in item.tasks_md + assert item.checklist[0].done is True + assert len(item.checklist) == 2 + + +def test_workspace_todos_update_materializes_checklist(tmp_path: Path): + from cecli.spec.todos import WorkspaceTodos + + api = WorkspaceTodos(tmp_path) + store = api.load() + item = TodoItem( + id="t1", + title="Feature", + tasks_md="", + checklist=[], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + store.todos.append(item) + api.save(store) + + updated, _ = api.update( + item.id, + tasks_md="- [ ] 1. Scaffold lib/ (depends: none)\n- [x] 2. Add tests\n", + ) + assert len(updated.checklist) == 2 + assert updated.checklist[0].done is False + assert updated.checklist[1].done is True diff --git a/tests/spec/test_pubspec_repair.py b/tests/spec/test_pubspec_repair.py new file mode 100644 index 00000000000..6704e33f183 --- /dev/null +++ b/tests/spec/test_pubspec_repair.py @@ -0,0 +1,68 @@ +"""Tests for pubspec.yaml dependency repair.""" + +from __future__ import annotations + +from pathlib import Path + +from cecli.spec.pubspec_repair import ( + find_missing_pubspec_dependencies, + parse_pubspec_dependencies, + repair_pubspec_dependencies, +) + + +def test_parse_pubspec_dependencies(): + text = """ +name: demo +dependencies: + flutter: + sdk: flutter + http: ^1.0.0 +dev_dependencies: + flutter_test: + sdk: flutter +""" + deps = parse_pubspec_dependencies(text) + assert "http" in deps + assert "flutter" not in deps + + +def test_find_missing_from_dart_imports(tmp_path: Path): + (tmp_path / "pubspec.yaml").write_text( + "name: demo\ndependencies:\n flutter:\n sdk: flutter\n", + encoding="utf-8", + ) + lib = tmp_path / "lib" + lib.mkdir() + (lib / "main.dart").write_text( + "import 'package:http/http.dart' as http;\nimport 'package:flutter/material.dart';\n", + encoding="utf-8", + ) + missing = find_missing_pubspec_dependencies(tmp_path) + assert missing == ["http"] + + +def test_repair_dry_run_lists_missing(tmp_path: Path): + (tmp_path / "pubspec.yaml").write_text( + "name: demo\ndependencies:\n flutter:\n sdk: flutter\n", + encoding="utf-8", + ) + (tmp_path / "lib").mkdir() + (tmp_path / "lib" / "a.dart").write_text( + "import 'package:provider/provider.dart';\n", encoding="utf-8" + ) + result = repair_pubspec_dependencies(tmp_path, apply=False) + assert "provider" in result.missing + assert result.applied is False + + +def test_repair_apply_appends_to_pubspec(tmp_path: Path): + pubspec = tmp_path / "pubspec.yaml" + pubspec.write_text( + "name: demo\ndependencies:\n flutter:\n sdk: flutter\n", + encoding="utf-8", + ) + result = repair_pubspec_dependencies(tmp_path, packages=["collection"], apply=True) + assert result.applied is True + text = pubspec.read_text(encoding="utf-8") + assert "collection:" in text diff --git a/tests/spec/test_spec_focus.py b/tests/spec/test_spec_focus.py new file mode 100644 index 00000000000..7efef0c3685 --- /dev/null +++ b/tests/spec/test_spec_focus.py @@ -0,0 +1,280 @@ +"""Spec-focus gating: preamble only with active task + spec layers.""" + +from __future__ import annotations + +import tempfile +import unittest + +from cecli.spec.focus import ( + build_user_message_with_spec_context, + should_inject_task_context, + spec_focus_preamble_applies, + spec_focus_requested, + todo_has_spec_content, +) +from cecli.spec.todos import ChecklistItem, TodoItem, TodoStore, migrate_todo_layers + + +def _item( + *, + requirements: str = "", + design: str = "", + tasks_md: str = "", +) -> TodoItem: + now = "2026-01-01T00:00:00Z" + return migrate_todo_layers( + TodoItem( + id="task-1", + title="Git tab", + spec="", + requirements=requirements, + design=design, + tasks_md=tasks_md, + depends_on=[], + branch="", + pr_url="", + status="open", + links=[], + checklist=[], + created_at=now, + updated_at=now, + ) + ) + + +class TestSpecFocusGating(unittest.TestCase): + def test_spec_focus_requested_flags(self): + self.assertTrue( + spec_focus_requested( + message_spec_focus=True, + session_spec_focus=False, + session_mode="vibe", + ) + ) + self.assertTrue( + spec_focus_requested( + message_spec_focus=False, + session_spec_focus=False, + session_mode="spec", + ) + ) + + def test_empty_layers_not_spec_content(self): + item = _item() + self.assertFalse(todo_has_spec_content(item)) + self.assertFalse(spec_focus_preamble_applies(focus_requested=True, item=item)) + + def test_tasks_md_alone_not_spec_content(self): + item = _item(tasks_md="- [ ] Explore project structure\n- [ ] Ship feature") + self.assertFalse(todo_has_spec_content(item)) + self.assertFalse(spec_focus_preamble_applies(focus_requested=True, item=item)) + + def test_layers_with_requirements_is_spec_content(self): + item = _item(requirements="### REQ-001\n**WHEN** x **THE** system **SHALL** y") + self.assertTrue(todo_has_spec_content(item)) + self.assertTrue(spec_focus_preamble_applies(focus_requested=True, item=item)) + + def test_no_preamble_without_active_task(self): + with tempfile.TemporaryDirectory() as tmp: + text, active, tid = build_user_message_with_spec_context( + tmp, + "Add revert in Git tab", + item=None, + store=None, + focus_requested=True, + inject_todo_spec=False, + ) + self.assertFalse(active) + self.assertIsNone(tid) + self.assertEqual(text, "Add revert in Git tab") + self.assertNotIn("Spec-focus mode", text) + + def test_preamble_without_full_reinject_on_followup_turn(self): + with tempfile.TemporaryDirectory() as tmp: + item = _item(requirements="### REQ-001\n**WHEN** open **THE** UI **SHALL** show revert") + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, active, tid = build_user_message_with_spec_context( + tmp, + "continue scaffolding", + item=item, + store=store, + focus_requested=True, + inject_todo_spec=False, + ) + self.assertTrue(active) + self.assertIsNone(tid) + self.assertIn("Spec-focus mode", text) + self.assertNotIn("REQ-001", text) + self.assertTrue(text.endswith("continue scaffolding")) + + def test_full_inject_when_inject_todo_spec_true(self): + with tempfile.TemporaryDirectory() as tmp: + item = _item(requirements="### REQ-001\n**WHEN** open **THE** UI **SHALL** show revert") + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, active, tid = build_user_message_with_spec_context( + tmp, + "Implement REQ-001", + item=item, + store=store, + focus_requested=True, + inject_todo_spec=True, + ) + self.assertTrue(active) + self.assertIn("REQ-001", text) + + def test_implement_inject_uses_lean_context(self): + with tempfile.TemporaryDirectory() as tmp: + req = "### REQ-001: Auth\n**WHEN** x **THE** system **SHALL** y\n" + ("detail " * 400) + design = "Overview\n" + ("architecture " * 500) + tasks = "- [ ] 1. Scaffold lib/ (depends: none)" + item = _item(requirements=req, design=design, tasks_md=tasks) + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, _, _ = build_user_message_with_spec_context( + tmp, + "Implement the active task per the injected requirements, design, and implementation tasks.", + item=item, + store=store, + focus_requested=True, + inject_todo_spec=True, + ) + self.assertIn("Requirements (summary)", text) + self.assertIn("### REQ-001", text) + self.assertNotIn("detail detail detail", text) + self.assertIn("Implementation tasks", text) + self.assertIn("Scaffold lib/", text) + self.assertIn("Implementation turn (tools)", text) + self.assertIn("EditText", text) + self.assertIn("Workspace snapshot", text) + + def test_implement_turn_detects_agent_prefix(self): + from cecli.spec.focus import is_implement_turn_message + + self.assertTrue( + is_implement_turn_message("/agent Implement only implementation task 1: Scaffold lib/.") + ) + self.assertTrue( + is_implement_turn_message("/agent Continue the active task from where you stopped.") + ) + + def test_agent_continuation_skips_full_spec_preamble(self): + with tempfile.TemporaryDirectory() as tmp: + req = "### REQ-001: Auth\n**WHEN** x **THE** system **SHALL** y\n" + item = _item(requirements=req, design="Overview", tasks_md="- [ ] 1. Scaffold") + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, _, _ = build_user_message_with_spec_context( + tmp, + "/agent Continue the active task from where you stopped.", + item=item, + store=store, + focus_requested=True, + inject_todo_spec=False, + agent_continuation=True, + ) + self.assertIn("Workspace snapshot", text) + self.assertIn("Continue (trimmed", text) + self.assertNotIn("Spec-focus mode (BrightVision)", text) + self.assertNotIn("Implementation turn (tools)", text) + + def test_agent_continuation_skips_full_task_inject(self): + item = _item(requirements="### REQ-001\n**WHEN** open **THE** UI **SHALL** show revert") + self.assertFalse( + should_inject_task_context( + focus_requested=True, + item=item, + inject_todo_spec=True, + agent_continuation=True, + ) + ) + + def test_resume_implement_skips_full_task_inject(self): + item = _item(requirements="### REQ-001\n**WHEN** x **THE** y **SHALL** z") + self.assertFalse( + should_inject_task_context( + focus_requested=True, + item=item, + inject_todo_spec=True, + message="/agent Continue the active task from where you stopped.", + ) + ) + + def test_resume_implement_injects_open_tasks_excerpt(self): + with tempfile.TemporaryDirectory() as tmp: + tasks = ( + "- [ ] 1. Scaffold workspace (`package.json`)\n" + "- [ ] 2. Add domain (`packages/domain/src/index.ts`)\n" + ) + item = _item( + requirements="### REQ-001\n**WHEN** x **THE** y **SHALL** z", tasks_md=tasks + ) + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, _, _ = build_user_message_with_spec_context( + tmp, + "/agent Continue the active task from where you stopped.", + item=item, + store=store, + focus_requested=True, + inject_todo_spec=False, + ) + self.assertIn("Open implementation tasks (resume)", text) + self.assertIn("package.json", text) + self.assertNotIn("Requirements (summary)", text) + + def test_inject_without_preamble_when_layers_empty(self): + with tempfile.TemporaryDirectory() as tmp: + item = _item() + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, active, tid = build_user_message_with_spec_context( + tmp, + "Seed requirements", + item=item, + store=store, + focus_requested=True, + inject_todo_spec=True, + ) + self.assertFalse(active) + self.assertEqual(tid, item.id) + self.assertIn("[Active task:", text) + self.assertNotIn("Spec-focus mode", text) + self.assertNotIn("(No requirements yet.)", text) + + def test_light_inject_for_checklist_task(self): + with tempfile.TemporaryDirectory() as tmp: + now = "2026-01-01T00:00:00Z" + item = migrate_todo_layers( + TodoItem( + id="task-2", + title="Explore repo", + spec="", + requirements="", + design="", + tasks_md="", + depends_on=[], + branch="", + pr_url="", + status="open", + links=[], + checklist=[ + ChecklistItem(id="c1", text="List crates", done=False), + ], + created_at=now, + updated_at=now, + ) + ) + store = TodoStore(version=1, active_id=item.id, todos=[item]) + text, active, tid = build_user_message_with_spec_context( + tmp, + "/agent go", + item=item, + store=store, + focus_requested=False, + inject_todo_spec=True, + ) + self.assertEqual(tid, item.id) + self.assertIn("## Checklist", text) + self.assertIn("```markdown", text) + self.assertIn("List crates", text) + self.assertNotIn("Requirements", text) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_spec_gen_agent.py b/tests/spec/test_spec_gen_agent.py new file mode 100644 index 00000000000..a0807bb11cd --- /dev/null +++ b/tests/spec/test_spec_gen_agent.py @@ -0,0 +1,170 @@ +"""Spec generation agent (repo map + explore + richness deepen).""" + +from __future__ import annotations + +import os +import unittest +from unittest.mock import MagicMock, patch + +from cecli.spec.gen_agent import ( + build_deepen_message_for_workspace, + build_spec_explore_message, + spec_gen_agent_enabled, + spec_gen_richness_gate_enabled, + wrap_spec_generate_message, +) +from cecli.spec.todos import TodoItem + + +class TestSpecGenAgent(unittest.TestCase): + def test_explore_message_is_read_only_agent(self): + item = TodoItem(id="a", title="Complex Patient") + msg = build_spec_explore_message( + prompt="iOS journaling app", + section="requirements", + item=item, + ) + self.assertTrue(msg.startswith("/agent")) + self.assertIn("Do NOT create", msg) + self.assertIn("Complex Patient", msg) + + def test_wrap_includes_steering_and_exploration(self): + with patch( + "cecli.spec.gen_agent.build_spec_focus_preamble", + return_value="## Project steering\nUse SwiftUI.\n", + ): + out = wrap_spec_generate_message( + "/tmp/ws", + "## Requirements\nWrite specs.\n", + exploration="- `Sources/App.swift` exists\n", + ) + self.assertIn("Project steering", out) + self.assertIn("Repository exploration", out) + self.assertIn("App.swift", out) + + def test_compact_disables_agent_and_richness_gate(self): + prev = os.environ.get("BV_COMPACT_SPEC_GEN") + os.environ["BV_COMPACT_SPEC_GEN"] = "1" + try: + self.assertFalse(spec_gen_agent_enabled()) + self.assertFalse(spec_gen_richness_gate_enabled()) + finally: + if prev is None: + os.environ.pop("BV_COMPACT_SPEC_GEN", None) + else: + os.environ["BV_COMPACT_SPEC_GEN"] = prev + + def test_compact_write_timeout_uses_full_turn_budget(self): + from cecli.spec.gen_agent import spec_gen_write_timeout_s + + prev = os.environ.get("BV_COMPACT_SPEC_GEN") + os.environ["BV_COMPACT_SPEC_GEN"] = "1" + try: + self.assertEqual(spec_gen_write_timeout_s(1800.0), 1740.0) + self.assertEqual(spec_gen_write_timeout_s(600.0), 540.0) + finally: + if prev is None: + os.environ.pop("BV_COMPACT_SPEC_GEN", None) + else: + os.environ["BV_COMPACT_SPEC_GEN"] = prev + + def test_deepen_message_carries_suggestions(self): + item = TodoItem( + id="a", + title="T", + requirements="### REQ-001\n**WHEN** a\n**THE** system **SHALL** b.\n", + ) + msg = build_deepen_message_for_workspace( + workspace="/tmp/ws", + prompt="Feature X", + item=item, + section="requirements", + suggestions=["requirements: add more acceptance criteria"], + ) + self.assertIn("Deepen the spec", msg) + self.assertIn("acceptance criteria", msg) + + def test_run_spec_layer_llm_one_shot_when_agent_disabled(self): + from cecli.spec.gen_agent import run_spec_layer_llm + + item = TodoItem(id="a", title="T") + runner = MagicMock() + runner.apply_spec_gen_route = MagicMock() + runner.run_one_shot.return_value = ( + "## Requirements\n### REQ-001\n**WHEN** a\n**THE** system **SHALL** b.\n" + ) + + with patch("cecli.spec.gen_agent.spec_gen_agent_enabled", return_value=False): + with patch("cecli.spec.gen_agent.spec_gen_richness_gate_enabled", return_value=False): + raw = run_spec_layer_llm( + runner, + workspace="/tmp/ws", + prompt="Build it", + item=item, + section="requirements", + mode="generate", + todo_id="a", + total_turn_timeout_s=600.0, + ) + self.assertIn("REQ-001", raw) + runner.run_one_shot.assert_called_once() + + def test_run_spec_layer_llm_deepens_when_richness_gate_fails(self): + from cecli.spec.gen_agent import run_spec_layer_llm + + item = TodoItem(id="a", title="T") + runner = MagicMock() + runner.apply_spec_gen_route = MagicMock() + thin = "## Requirements\n### REQ-001\n**WHEN** a\n**THE** system **SHALL** b.\n" + deep = ( + "## Requirements\n### REQ-001\n**WHEN** a\n**THE** system **SHALL** b.\n" + "### REQ-002\n**WHEN** c\n**THE** system **SHALL** d.\n" + ) + runner.run_one_shot.side_effect = [thin, deep] + + with patch("cecli.spec.gen_agent.spec_gen_agent_enabled", return_value=False): + with patch("cecli.spec.gen_agent.spec_gen_richness_gate_enabled", return_value=True): + raw = run_spec_layer_llm( + runner, + workspace="/tmp/ws", + prompt="Build it", + item=item, + section="requirements", + mode="generate", + todo_id="a", + total_turn_timeout_s=600.0, + ) + self.assertEqual(runner.run_one_shot.call_count, 2) + self.assertIn("REQ-002", raw) + + def test_run_spec_layer_llm_explore_when_agent_enabled(self): + from cecli.spec.gen_agent import run_spec_layer_llm + + item = TodoItem(id="a", title="T") + runner = MagicMock() + runner.apply_spec_gen_route = MagicMock() + runner.run_message.return_value = iter( + [{"type": "done", "assistant_text": "- `src/main.py` exists\n"}] + ) + runner.run_one_shot.return_value = ( + "## Requirements\n### REQ-001\n**WHEN** a\n**THE** system **SHALL** b.\n" + ) + + with patch("cecli.spec.gen_agent.spec_gen_agent_enabled", return_value=True): + with patch("cecli.spec.gen_agent.spec_gen_richness_gate_enabled", return_value=False): + raw = run_spec_layer_llm( + runner, + workspace="/tmp/ws", + prompt="Build it", + item=item, + section="requirements", + mode="generate", + todo_id="a", + total_turn_timeout_s=600.0, + ) + runner.run_message.assert_called_once() + self.assertIn("REQ-001", raw) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_spec_job_debug.py b/tests/spec/test_spec_job_debug.py new file mode 100644 index 00000000000..c80660d4d8f --- /dev/null +++ b/tests/spec/test_spec_job_debug.py @@ -0,0 +1,33 @@ +"""Spec job debug export (unit tests — HTTP routes stay in BrightVision tests/core).""" + +from __future__ import annotations + +import unittest + +from cecli.spec.job_debug import build_spec_job_debug_export +from cecli.spec.jobs import SpecGenerationJob + + +class TestSpecJobDebug(unittest.TestCase): + def test_build_spec_job_debug_export_shape(self): + job = SpecGenerationJob( + job_id="abc123", + workspace="/tmp/ws", + todo_id="todo-1", + prompt="Build modules", + mode="generate", + section="requirements", + model="gpt-4o", + status="running", + recent_io_events=[{"type": "progress", "label": "LLM", "message": "Waiting…"}], + ) + payload = build_spec_job_debug_export(job) + self.assertEqual(payload["format"], "brightvision-spec-job-debug-v1") + self.assertEqual(payload["job_id"], "abc123") + self.assertEqual(payload["job"]["status"], "running") + self.assertEqual(payload["job"]["section"], "requirements") + self.assertEqual(len(payload["recent_io_events"]), 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_spec_jobs.py b/tests/spec/test_spec_jobs.py new file mode 100644 index 00000000000..1b64811f2b3 --- /dev/null +++ b/tests/spec/test_spec_jobs.py @@ -0,0 +1,80 @@ +"""Background spec-generation job types and timeout helpers.""" + +from __future__ import annotations + +import os +import unittest + +from cecli.spec.jobs import ( + SpecGenerationJob, + job_turn_timeout_s, + job_wall_timeout_s, + spec_gen_section_wait_s, + spec_gen_timeout_s, + spec_gen_turn_timeout_s, +) + + +class TestSpecJobs(unittest.TestCase): + def test_spec_gen_timeout_s_env_override(self): + prev = os.environ.get("LLM_SPEC_GEN_TIMEOUT_S") + os.environ["LLM_SPEC_GEN_TIMEOUT_S"] = "900" + try: + self.assertEqual(spec_gen_timeout_s(), 900.0) + finally: + if prev is None: + os.environ.pop("LLM_SPEC_GEN_TIMEOUT_S", None) + else: + os.environ["LLM_SPEC_GEN_TIMEOUT_S"] = prev + + def test_spec_gen_timeout_s_invalid_env_falls_back(self): + prev = os.environ.get("LLM_SPEC_GEN_TIMEOUT_S") + os.environ["LLM_SPEC_GEN_TIMEOUT_S"] = "not-a-number" + try: + self.assertGreaterEqual(spec_gen_timeout_s(), 60.0) + finally: + if prev is None: + os.environ.pop("LLM_SPEC_GEN_TIMEOUT_S", None) + else: + os.environ["LLM_SPEC_GEN_TIMEOUT_S"] = prev + + def test_job_wall_timeout_prefers_job_override(self): + job = SpecGenerationJob( + job_id="j", + workspace="/tmp", + todo_id="t", + wall_timeout_s=180.0, + ) + self.assertEqual(job_wall_timeout_s(job), 180.0) + + def test_job_turn_timeout_prefers_job_override(self): + job = SpecGenerationJob( + job_id="j", + workspace="/tmp", + todo_id="t", + turn_timeout_s=90.0, + ) + self.assertEqual(job_turn_timeout_s(job), 90.0) + + def test_section_wait_bounded_by_job_cap(self): + prev_job = os.environ.get("LLM_SPEC_GEN_TIMEOUT_S") + prev_turn = os.environ.get("LLM_SPEC_GEN_TURN_TIMEOUT_S") + os.environ["LLM_SPEC_GEN_TIMEOUT_S"] = "300" + os.environ["LLM_SPEC_GEN_TURN_TIMEOUT_S"] = "200" + try: + wait = spec_gen_section_wait_s() + self.assertLessEqual(wait, spec_gen_timeout_s()) + self.assertGreaterEqual(wait, spec_gen_turn_timeout_s()) + finally: + for key, prev in ( + ("LLM_SPEC_GEN_TIMEOUT_S", prev_job), + ("LLM_SPEC_GEN_TURN_TIMEOUT_S", prev_turn), + ): + if prev is None: + os.environ.pop(key, None) + else: + os.environ[key] = prev + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_spec_package.py b/tests/spec/test_spec_package.py new file mode 100644 index 00000000000..c263a6157e2 --- /dev/null +++ b/tests/spec/test_spec_package.py @@ -0,0 +1,73 @@ +"""Package boundary: cecli.spec is self-contained and importable without BrightVision.""" + +from __future__ import annotations + +import ast +import unittest +from pathlib import Path + +import cecli.spec +import cecli.spec.ears as ears_pkg +from cecli.spec import ( + SpecGenerationJob, + analyze_requirements, + analyze_traceability, + build_spec_index, +) +from cecli.spec.ears.model import EarsLintResult +from cecli.spec.jobs import spec_gen_timeout_s +from cecli.spec.runtime import AgentTodoSession, SpecTurnRunner + +_FORBIDDEN_PREFIXES = ( + "bright_vision_core", + "fastapi", + "bright_vision", +) + + +def _spec_root() -> Path: + return Path(cecli.spec.__file__).resolve().parent + + +class TestSpecPackage(unittest.TestCase): + def test_public_api_imports(self): + self.assertTrue(callable(analyze_requirements)) + self.assertTrue(callable(analyze_traceability)) + self.assertTrue(callable(build_spec_index)) + self.assertTrue(callable(spec_gen_timeout_s)) + job = SpecGenerationJob(job_id="j1", workspace="/tmp", todo_id="t1") + self.assertEqual(job.status, "pending") + + def test_ears_subpackage_exports(self): + self.assertTrue(hasattr(ears_pkg, "analyze_requirements")) + self.assertTrue(hasattr(ears_pkg, "build_spec_index")) + + def test_runtime_protocols_are_importable(self): + # Structural typing — no runtime isinstance checks required. + self.assertTrue(hasattr(SpecTurnRunner, "apply_spec_gen_route")) + self.assertTrue(hasattr(AgentTodoSession, "coder")) + + def test_no_forbidden_imports_in_spec_tree(self): + violations: list[str] = [] + for path in sorted(_spec_root().rglob("*.py")): + tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + if any(alias.name.startswith(p) for p in _FORBIDDEN_PREFIXES): + violations.append(f"{path.name}: import {alias.name}") + elif isinstance(node, ast.ImportFrom): + if node.module and any(node.module.startswith(p) for p in _FORBIDDEN_PREFIXES): + violations.append(f"{path.name}: from {node.module}") + self.assertEqual(violations, [], "\n".join(violations)) + + def test_ears_lint_result_serializes(self): + result = analyze_requirements("### REQ-001\n**WHEN** x\n**THE** system **SHALL** y.\n") + self.assertIsInstance(result, EarsLintResult) + payload = result.to_dict() + self.assertIn("ok", payload) + self.assertIn("issues", payload) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_spec_steering.py b/tests/spec/test_spec_steering.py new file mode 100644 index 00000000000..82d2e25bb87 --- /dev/null +++ b/tests/spec/test_spec_steering.py @@ -0,0 +1,66 @@ +"""Spec-focus steering loader.""" + +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path + +from cecli.spec.steering import ( + STEERING_MAIN_RELPATH, + build_spec_focus_preamble, + load_steering_markdown, + scaffold_steering_files, + scan_steering_files, +) + + +class TestSpecSteering(unittest.TestCase): + def test_load_steering_files(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / ".cecli").mkdir() + (root / ".cecli" / "STEERING.md").write_text( + "Use TypeScript strict mode.", encoding="utf-8" + ) + steering = root / ".cecli" / "steering" + steering.mkdir() + (steering / "security.md").write_text("No secrets in repo.", encoding="utf-8") + text = load_steering_markdown(root) + self.assertIn("strict mode", text) + self.assertIn("security.md", text) + + def test_preamble_includes_spec_focus(self): + with tempfile.TemporaryDirectory() as tmp: + pre = build_spec_focus_preamble(tmp) + self.assertIn("Spec-focus mode", pre) + + def test_scan_steering_files(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / ".cecli").mkdir() + (root / ".cecli" / "STEERING.md").write_text("Rules here.", encoding="utf-8") + steering = root / ".cecli" / "steering" + steering.mkdir() + (steering / "security.md").write_text("", encoding="utf-8") + (steering / "style.md").write_text("Tabs not spaces.", encoding="utf-8") + snapshot = scan_steering_files(root) + self.assertTrue(snapshot.has_content) + self.assertEqual(snapshot.file_count, 2) + self.assertIsNotNone(snapshot.main) + self.assertTrue(snapshot.main.nonempty) + self.assertEqual(len(snapshot.fragments), 2) + self.assertFalse(snapshot.fragments[0].nonempty) + + def test_scaffold_steering_creates_main_once(self): + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + created = scaffold_steering_files(root) + self.assertEqual(created, [STEERING_MAIN_RELPATH]) + self.assertTrue((root / ".cecli" / "STEERING.md").is_file()) + again = scaffold_steering_files(root) + self.assertEqual(again, []) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_tasks_cli.py b/tests/spec/test_tasks_cli.py new file mode 100644 index 00000000000..2964f4d6c83 --- /dev/null +++ b/tests/spec/test_tasks_cli.py @@ -0,0 +1,30 @@ +"""CLI tests for cecli.spec.tasks_cli.""" + +from __future__ import annotations + +from pathlib import Path + +from cecli.spec.tasks_cli import main +from cecli.spec.todos import TodoItem, WorkspaceTodos, _now_iso + + +def test_cli_materialize_and_progress(tmp_path: Path): + api = WorkspaceTodos(tmp_path) + store = api.load() + item = TodoItem( + id="t1", + title="Feature", + tasks_md="- [ ] 1.1 Wire (depends: none)\n- [x] 1.2 Test\n", + checklist=[], + created_at=_now_iso(), + updated_at=_now_iso(), + ) + store.todos.append(item) + store.active_id = item.id + api.save(store) + + assert main(["--workspace", str(tmp_path), "materialize", "--todo-id", "t1"]) == 0 + assert main(["--workspace", str(tmp_path), "progress", "--todo-id", "t1"]) == 0 + + updated = api.load().todos[0] + assert len(updated.checklist) == 2 diff --git a/tests/spec/test_todo_markdown.py b/tests/spec/test_todo_markdown.py new file mode 100644 index 00000000000..2123d007892 --- /dev/null +++ b/tests/spec/test_todo_markdown.py @@ -0,0 +1,60 @@ +"""Markdown import/export for workspace tasks.""" + +from __future__ import annotations + +import unittest + +from cecli.spec.markdown import export_markdown, import_markdown +from cecli.spec.todos import ChecklistItem, TodoItem, TodoStore + + +class TestTodoMarkdown(unittest.TestCase): + def test_export_import_roundtrip(self): + store = TodoStore( + todos=[ + TodoItem( + id="task-1", + title="Auth flow", + status="in_progress", + requirements="### REQ-001\n**WHEN** login\n**THE** system **SHALL** auth.\n", + design="## Overview\nOAuth.", + tasks_md="- [ ] 1. Add route (depends: none)", + checklist=[ChecklistItem(id="c1", text="Wire UI", done=False)], + depends_on=["task-0"], + branch="feature/auth", + ) + ], + active_id="task-1", + ) + md = export_markdown(store) + self.assertIn("# Auth flow", md) + self.assertIn("activeId: task-1", md) + self.assertIn("## Requirements", md) + self.assertIn("REQ-001", md) + + imported = import_markdown(md) + self.assertEqual(len(imported.todos), 1) + item = imported.todos[0] + self.assertEqual(item.title, "Auth flow") + self.assertEqual(item.id, "task-1") + self.assertIn("REQ-001", item.requirements) + self.assertEqual(imported.active_id, "task-1") + self.assertEqual(len(item.checklist), 1) + self.assertEqual(item.checklist[0].text, "Wire UI") + + def test_import_legacy_specification_section(self): + md = """\ +# Legacy task +id: legacy-1 +status: todo + +## Specification +Single-layer spec body. +""" + store = import_markdown(md) + self.assertEqual(len(store.todos), 1) + self.assertIn("Single-layer", store.todos[0].spec) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_todo_spec_ears.py b/tests/spec/test_todo_spec_ears.py new file mode 100644 index 00000000000..b30f88020d3 --- /dev/null +++ b/tests/spec/test_todo_spec_ears.py @@ -0,0 +1,40 @@ +"""E5: EARS context in spec generate and apply gate.""" + +from __future__ import annotations + +import unittest + +from cecli.spec.ears.prompt import ( + format_spec_quality_for_prompt, + requirements_pass_ears, +) +from cecli.spec.generate import build_generate_message +from cecli.spec.todos import TodoItem + + +class TestTodoSpecEars(unittest.TestCase): + def test_refine_prompt_includes_ears_section(self): + item = TodoItem( + id="t1", + title="Auth", + requirements="### REQ-001\n**WHEN** x\n**THE** system shows y.\n", + design="", + tasks_md="", + ) + msg = build_generate_message("fix", mode="refine", item=item) + self.assertIn("Current spec quality", msg) + self.assertIn("EARS_NO_SHALL", msg) + + def test_requirements_pass_ears(self): + ok, issues = requirements_pass_ears( + "### REQ-001\n**WHEN** a\n**THE** system **SHALL** b.\n" + ) + self.assertTrue(ok) + self.assertEqual(issues, []) + + def test_format_spec_quality_empty(self): + self.assertEqual(format_spec_quality_for_prompt("", "", ""), "") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_todo_spec_phased.py b/tests/spec/test_todo_spec_phased.py new file mode 100644 index 00000000000..8e8bfe860cf --- /dev/null +++ b/tests/spec/test_todo_spec_phased.py @@ -0,0 +1,146 @@ +"""Phased spec section prompts and merge (no LLM).""" + +from __future__ import annotations + +import os +import unittest + +from cecli.spec.generate import ( + build_generate_message, + merge_generated_layers, + parse_generated_layers, + validate_section_prerequisites, +) +from cecli.spec.todos import TodoItem + + +class TestTodoSpecPhased(unittest.TestCase): + def _item(self) -> TodoItem: + return TodoItem( + id="abc", + title="Moon base", + requirements="### REQ-001\n**WHEN** launch\n**THE** system **SHALL** land.\n", + design="## Overview\nREQ-001", + tasks_md="- [ ] 1. Step (depends: none)", + ) + + def test_requirements_prompt_includes_partial_draft(self): + item = self._item() + item.requirements = "### REQ-001\nDraft only" + msg = build_generate_message("Expand coverage", item=item, section="requirements") + self.assertIn("Existing requirements draft", msg) + self.assertIn("Draft only", msg) + self.assertIn("## Requirements", msg) + self.assertNotIn("## Design", msg) + + def test_design_prompt_includes_requirements_and_partial_design(self): + item = self._item() + item.design = "## Draft\nPartial" + msg = build_generate_message("Add modules", item=item, section="design") + self.assertIn("REQ-001", msg) + self.assertIn("Existing design draft", msg) + self.assertIn("Partial", msg) + self.assertNotIn("Current spec quality", msg) + + def test_tasks_prompt_omits_ears_quality_block(self): + item = self._item() + msg = build_generate_message("Break down work", item=item, section="tasks_md") + self.assertNotIn("Current spec quality", msg) + + def test_tasks_prompt_includes_req_and_design(self): + item = self._item() + item.tasks_md = "" + msg = build_generate_message("Break down work", item=item, section="tasks_md") + self.assertIn("REQ-001", msg) + self.assertIn("## Overview", msg) + self.assertIn("## Implementation tasks", msg) + + def test_merge_design_keeps_requirements(self): + item = self._item() + parsed = {"requirements": "", "design": "## New design\nREQ-001", "tasks_md": ""} + merged = merge_generated_layers(item, parsed, section="design") + self.assertIn("REQ-001", merged["requirements"]) + self.assertIn("New design", merged["design"]) + self.assertIn("Step", merged["tasks_md"]) + + def test_validate_prerequisites(self): + item = self._item() + item.requirements = "" + with self.assertRaises(ValueError): + validate_section_prerequisites(item, "design") + item.requirements = "req" + item.design = "" + with self.assertRaises(ValueError): + validate_section_prerequisites(item, "tasks_md") + + def test_parse_design_only(self): + text = "## Design\n## Overview\nHandles REQ-001.\n" + layers = parse_generated_layers(text, section="design") + self.assertIn("Overview", layers["design"]) + + def test_parse_tasks_header_alias(self): + text = "## Requirements\n### REQ-001\nWHEN x THE system SHALL y.\n\n## Tasks\n- [ ] 1. Step (depends: none)\n" + layers = parse_generated_layers(text, section="tasks_md") + self.assertIn("1. Step", layers["tasks_md"]) + + def test_parse_deepen_pass_tail(self): + text = ( + "## Implementation tasks\n- [ ] 1. Thin step (depends: none)\n\n" + "--- deepen pass ---\n\n" + "## Implementation tasks\n" + "- [ ] 1. Wire API for REQ-001 (depends: none)\n" + "- [ ] 2. Add tests for REQ-001 (depends: 1)\n" + ) + layers = parse_generated_layers(text, section="tasks_md") + self.assertIn("Wire API", layers["tasks_md"]) + self.assertIn("Add tests", layers["tasks_md"]) + + def test_requirements_prompt_uses_kiro_structure(self): + item = self._item() + msg = build_generate_message("New feature", item=item, section="requirements") + self.assertIn("User Story", msg) + self.assertIn("Acceptance Criteria", msg) + self.assertIn("### Introduction", msg) + + def test_design_prompt_requests_full_subsections(self): + item = self._item() + msg = build_generate_message("Design it", item=item, section="design") + for label in ( + "Architecture", + "Components and Interfaces", + "Data Models", + "Error Handling", + "Testing Strategy", + ): + self.assertIn(label, msg) + + def test_compact_design_prompt_omits_kiro_subsections(self): + item = self._item() + prev = os.environ.get("BV_COMPACT_SPEC_GEN") + os.environ["BV_COMPACT_SPEC_GEN"] = "1" + try: + msg = build_generate_message("Design it", item=item, section="design") + self.assertIn("under 35 lines", msg) + self.assertNotIn("### Data Models", msg) + finally: + if prev is None: + os.environ.pop("BV_COMPACT_SPEC_GEN", None) + else: + os.environ["BV_COMPACT_SPEC_GEN"] = prev + + def test_tasks_prompt_requests_requirement_traceability(self): + item = self._item() + msg = build_generate_message("Plan it", item=item, section="tasks_md") + self.assertIn("_Requirements:", msg) + self.assertIn("depends:", msg) + + def test_all_layers_prompt_keeps_three_headings(self): + msg = build_generate_message("Build a thing", section="all") + self.assertIn("## Requirements", msg) + self.assertIn("## Design", msg) + self.assertIn("## Implementation tasks", msg) + self.assertIn("User Story", msg) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/spec/test_workspace_paths.py b/tests/spec/test_workspace_paths.py new file mode 100644 index 00000000000..eb24bc8f300 --- /dev/null +++ b/tests/spec/test_workspace_paths.py @@ -0,0 +1,45 @@ +"""Workspace metadata paths under ``.cecli/``.""" + +from __future__ import annotations + +from pathlib import Path + +from cecli.spec.paths import ( + WORKSPACE_META_DIR, + attachments_dir, + todos_json_path, + workspace_meta_dir, +) +from cecli.spec.todos import WorkspaceTodos + + +def test_workspace_meta_dir_creates_cecli(tmp_path: Path): + meta = workspace_meta_dir(tmp_path) + assert meta.name == WORKSPACE_META_DIR == ".cecli" + assert meta.is_dir() + + +def test_existing_cecli_agents_preserved(tmp_path: Path): + cecli = tmp_path / ".cecli" + (cecli / "agents").mkdir(parents=True) + meta = workspace_meta_dir(tmp_path) + assert (cecli / "agents").is_dir() + assert meta == cecli + + +def test_workspace_todos_uses_cecli(tmp_path: Path): + api = WorkspaceTodos(tmp_path) + assert str(api.path).endswith(".cecli/todos.json") + assert str(api.specs_root).endswith(".cecli/specs") + + +def test_attachments_dir_under_cecli(tmp_path: Path): + path = attachments_dir(tmp_path) + assert path == tmp_path / ".cecli" / "attachments" + + +def test_concurrent_meta_dir_creation(tmp_path: Path): + """Parallel calls must not raise.""" + workspace_meta_dir(tmp_path) + workspace_meta_dir(tmp_path) + assert todos_json_path(tmp_path).parent.is_dir() diff --git a/tests/spec/test_workspace_todos.py b/tests/spec/test_workspace_todos.py new file mode 100644 index 00000000000..3eb176d840b --- /dev/null +++ b/tests/spec/test_workspace_todos.py @@ -0,0 +1,159 @@ +import unittest +from pathlib import Path + +from cecli.spec.todos import WorkspaceTodos +from cecli.utils import GitTemporaryDirectory, make_repo + + +class TestWorkspaceTodos(unittest.TestCase): + def test_roundtrip(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Ship feature", template="feature") + self.assertIn("## Goal", item.spec) + self.assertTrue(api.path.is_file()) + store = api.load() + self.assertEqual(len(store.todos), 1) + self.assertEqual(store.todos[0].title, "Ship feature") + api.set_active(item.id) + store = api.load() + self.assertEqual(store.active_id, item.id) + api.append_links(["src/foo.ts", "commit:abc123"]) + store = api.load() + self.assertIn("src/foo.ts", store.todos[0].links) + api.mark_done(item.id) + store = api.load() + self.assertEqual(store.todos[0].status, "done") + self.assertIsNone(store.active_id) + + def test_move_reorders(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + a = api.add("First") + b = api.add("Second") + store = api.load() + self.assertEqual(store.todos[0].id, b.id) + api.move(b.id, "down") + store = api.load() + self.assertEqual(store.todos[0].id, a.id) + self.assertEqual(store.todos[1].id, b.id) + + def test_import_spec_files(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Spec task", template="spec-driven") + api.sync_spec_files(item) + spec_dir = api.specs_root / item.id + (spec_dir / "requirements.md").write_text("### REQ-1\nUpdated", encoding="utf-8") + loaded = api.import_spec_files(item.id) + self.assertIn("Updated", loaded.requirements) + + def test_import_spec_files_short_folder_id(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Spec task") + full_dir = api.specs_root / item.id + if full_dir.is_dir(): + import shutil + + shutil.rmtree(full_dir) + short = item.id[:8] + spec_dir = api.specs_root / short + spec_dir.mkdir(parents=True, exist_ok=True) + (spec_dir / "requirements.md").write_text("### REQ-1\nFrom disk", encoding="utf-8") + loaded = api.import_spec_files(item.id) + self.assertIn("From disk", loaded.requirements) + + def test_maybe_import_spec_from_disk_when_layers_empty(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Spec task") + full_dir = api.specs_root / item.id + if full_dir.is_dir(): + import shutil + + shutil.rmtree(full_dir) + short = item.id[:8] + spec_dir = api.specs_root / short + spec_dir.mkdir(parents=True, exist_ok=True) + (spec_dir / "requirements.md").write_text("### REQ-1\nAuto", encoding="utf-8") + loaded = api.maybe_import_spec_from_disk(item) + self.assertIn("Auto", loaded.requirements) + + def test_delete_removes_spec_folder(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Gone", template="spec-driven") + api.sync_spec_files(item) + spec_dir = api.specs_root / item.id + self.assertTrue(spec_dir.is_dir()) + api.delete(item.id) + self.assertFalse(spec_dir.is_dir()) + + def test_prune_orphan_spec_folders(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Keep") + orphan = api.specs_root / "deleted-task-id" + orphan.mkdir(parents=True) + (orphan / "requirements.md").write_text("orphan", encoding="utf-8") + count, ids = api.prune_orphan_spec_folders() + self.assertEqual(count, 1) + self.assertEqual(ids, ["deleted-task-id"]) + self.assertFalse(orphan.is_dir()) + self.assertTrue( + (api.specs_root / item.id).is_dir() or not (api.specs_root / item.id).exists() + ) + + def test_sync_spec_files_writes_layers(self): + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + api = WorkspaceTodos(root) + item = api.add("Export task", template="spec-driven") + item, _ = api.update(item.id, requirements="### REQ-1\nFrom json") + api.sync_spec_files(item) + spec_dir = api.specs_root / item.id + self.assertIn("From json", (spec_dir / "requirements.md").read_text(encoding="utf-8")) + + def test_delete_removes_linked_agent_todo_txt(self): + from cecli.spec.agent_todos import ( + AgentTodoRow, + format_agent_todo_txt, + import_agent_plan_for_workspace, + ) + + with GitTemporaryDirectory() as temp_dir: + root = Path(temp_dir) + make_repo(root) + agent_path = root / ".cecli" / "agents" / "default" / "todo.txt" + agent_path.parent.mkdir(parents=True, exist_ok=True) + agent_path.write_text( + format_agent_todo_txt([AgentTodoRow(text="Ship it", done=False, current=True)]), + encoding="utf-8", + ) + store = import_agent_plan_for_workspace(root) + todo_id = store.todos[0].id + api = WorkspaceTodos(root) + api.delete(todo_id) + store = api.load() + self.assertEqual(len(store.todos), 0) + self.assertFalse(agent_path.is_file()) + + +if __name__ == "__main__": + unittest.main()