diff --git a/skills/typescript-client/SKILL.md b/skills/typescript-client/SKILL.md
index 22c2c3d9ed6..a78ef9a15bb 100644
--- a/skills/typescript-client/SKILL.md
+++ b/skills/typescript-client/SKILL.md
@@ -106,3 +106,10 @@ conn.db.user.onInsert((ctx, user) => console.log('Joined:', user.name));
 conn.db.user.onDelete((ctx, user) => console.log('Left:', user.name));
 conn.db.user.onUpdate((ctx, oldUser, newUser) => console.log('Updated:', newUser.name));
 ```
+
+## Gotchas
+
+- **`useTable` rows are `readonly`.** Copy before sorting/mutating, or it fails to type-check:
+  `const [rows] = useTable(tables.message); const sorted = [...rows].sort(...)`.
+- **bigint in JSX.** ids/counts from `t.u64()`/`t.i64()` columns are `bigint`, which React
+  cannot render. Wrap it: `{Number(row.id)}` or `{String(count)}`.
diff --git a/skills/typescript-server/SKILL.md b/skills/typescript-server/SKILL.md
index e71e17a1290..b965ceca72e 100644
--- a/skills/typescript-server/SKILL.md
+++ b/skills/typescript-server/SKILL.md
@@ -83,6 +83,14 @@ const spacetimedb = schema({ entity, record });  // ONE object, not spread args
 export default spacetimedb;
 ```
 
+The published module's **entry file must export the schema as default**. If you split tables
+(`schema.ts`) from reducers/lifecycle (`index.ts`), re-export it from the entry:
+
+```typescript
+// index.ts
+export { default } from './schema';   // re-export the schema for the module entry
+```
+
 ## Reducers
 
 Export name becomes the reducer name:
@@ -131,6 +139,10 @@ export const onDisconnect = spacetimedb.clientDisconnected((ctx) => { ... });
 // Auth: ctx.sender is the caller's Identity
 if (!row.owner.equals(ctx.sender)) throw new SenderError('unauthorized');
 
+// ctx.connectionId: the per-connection id, NULLABLE (ConnectionId | null) — null-check before use.
+// One Identity can hold several connections (multiple tabs/devices).
+if (ctx.connectionId) { /* ... */ }
+
 // Server timestamp (deterministic per reducer call)
 ctx.db.item.insert({ id: 0n, createdAt: ctx.timestamp });
 
@@ -161,6 +173,8 @@ export const tick = spacetimedb.reducer(
 
 // One-time: ScheduleAt.time(ctx.timestamp.microsSinceUnixEpoch + delayMicros)
 // Repeating: ScheduleAt.interval(60_000_000n)
+// Read time back from a scheduleAt value (tagged union):
+//   const micros = at.tag === 'time' ? at.value : at.value.microsSinceUnixEpoch;  // bigint
 ```
 
 ## Custom Types
@@ -183,6 +197,10 @@ const Shape = t.enum('Shape', {
 
 ## Views
 
+A client subscribing to a view receives only the rows it returns. Use a per-user view
+(keyed on `ctx.sender`) for per-viewer access control: deleting a row it depends on
+(e.g. a membership row) automatically drops the rows it was exposing from that client.
+
 ```typescript
 // Anonymous view (same for all clients):
 export const activeUsers = spacetimedb.anonymousView(
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/base_mongodb.md b/tools/llm-oneshot/apps/chat-app/prompts/base_mongodb.md
new file mode 100644
index 00000000000..a1bc00d65d8
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/base_mongodb.md
@@ -0,0 +1,37 @@
+# MongoDB Chat App - Base Prompt
+
+Create me a **real-time chat app** using **MongoDB as the backend**.
+
+Project root is:
+
+```
+apps/chat-app/
+```
+
+Create the project under a **timestamped folder**:
+
+```
+apps/chat-app/mongodb/chat-app-YYYYMMDD-HHMMSS/
+```
+
+Use `chat-app` as the **database name** for MongoDB.
+
+## Constraints
+
+- Work **entirely inside** your timestamped folder. Do not touch any other existing code.
+- Only create/modify code under:
+  - `apps/chat-app/mongodb/chat-app-YYYYMMDD-HHMMSS/server/` (server-side TypeScript)
+  - `apps/chat-app/mongodb/chat-app-YYYYMMDD-HHMMSS/client/` (client-side TypeScript/React)
+- Keep it minimal and readable.
+
+## UI Requirements
+
+- Dark theme with consistent color palette
+- Clear visual hierarchy — active states, hover effects, focus indicators
+- Responsive layout that works on desktop (mobile optional)
+- Loading and empty states for all data-dependent views
+- Visual feedback for user actions (button states, success/error indicators)
+
+## Features
+
+<!-- Include feature files below this line -->
diff --git a/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-mongodb.md b/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-mongodb.md
new file mode 100644
index 00000000000..39686100bd5
--- /dev/null
+++ b/tools/llm-oneshot/apps/chat-app/prompts/language/typescript-mongodb.md
@@ -0,0 +1,44 @@
+# Language: TypeScript + MongoDB
+
+Create this app using **MongoDB as the backend** with **TypeScript**.
+
+## Project Setup
+
+```
+apps/chat-app/staging/typescript/<LLM_MODEL>/mongodb/chat-app-YYYYMMDD-HHMMSS/
+```
+
+Database name: `chat-app`
+
+## Architecture
+
+**Backend:** Node.js + Express + Mongoose + Socket.io
+**Client:** React + Vite + TypeScript
+
+## Constraints
+
+- Only create/modify code under:
+  - `.../server/` (server-side TypeScript)
+  - `.../client/` (client-side TypeScript/React)
+- Keep it minimal and readable.
+
+## Branding & Styling
+
+- App title: **"MongoDB Chat"**
+- Dark theme using official MongoDB brand colors:
+  - Primary: `#00ED64` (MongoDB green)
+  - Primary hover: `#00C957` (darker green)
+  - Secondary: `#00684A` (MongoDB forest green)
+  - Background: `#001E2B` (MongoDB dark slate)
+  - Surface: `#023430` (deep green-slate)
+  - Border: `#1C2D38` (muted slate border)
+  - Text: `#E8EDEB` (light gray)
+  - Text muted: `#889397` (MongoDB gray)
+  - Accent: `#00ED64` (MongoDB green)
+  - Success: `#00ED64` (green for online indicators)
+  - Warning: `#FFC010` (MongoDB amber)
+  - Danger: `#FF4F4F` (MongoDB red)
+
+## Output
+
+Return only code blocks with file headers for the files you create.
diff --git a/tools/llm-sequential-upgrade/.gitattributes b/tools/llm-sequential-upgrade/.gitattributes
new file mode 100644
index 00000000000..a829a52574a
--- /dev/null
+++ b/tools/llm-sequential-upgrade/.gitattributes
@@ -0,0 +1,3 @@
+# Shell scripts here are run under bash (git-bash on Windows). Force LF so they
+# don't get CRLF-converted on checkout and break under stricter bash (WSL/CI).
+*.sh text eol=lf
diff --git a/tools/llm-sequential-upgrade/.gitignore b/tools/llm-sequential-upgrade/.gitignore
index 84ae31595e1..14aa619a63d 100644
--- a/tools/llm-sequential-upgrade/.gitignore
+++ b/tools/llm-sequential-upgrade/.gitignore
@@ -4,15 +4,12 @@
 **/results/**/.vite/
 **/results/**/drizzle/
 
+# Local env files inside generated apps (not committed)
+**/results/**/.env
+
 # Telemetry backup files
 **/telemetry/*.jsonl.bak
 
-
-# Playwright
-**/playwright/node_modules/
-**/playwright/test-results/
-**/playwright/playwright-report/
-
 # Isolation git repos inside generated apps (created by run.sh, cleaned up after)
 **/results/**/.git/
 # OTel collector live dump - not tracked
@@ -21,3 +18,13 @@ telemetry/metrics.jsonl
 
 # Raw telemetry contains PII (email, account IDs) - store privately
 **/telemetry/**/raw-telemetry.jsonl
+# Full Claude Code session transcript (large; contains absolute paths/PII) - store privately
+**/telemetry/**/session-transcript.jsonl
+# Verbose run transcripts (large, regenerable) - not tracked
+**/telemetry/**/run.log
+# Local absolute app paths (machine-specific)
+**/telemetry/**/app-dir.txt
+**/telemetry/**/metadata.json
+
+# Sequential-upgrade run output lives in the external spacetimedb-ai-test-results repo
+sequential-upgrade/sequential-upgrade-*/
diff --git a/tools/llm-sequential-upgrade/CLAUDE.md b/tools/llm-sequential-upgrade/CLAUDE.md
index 69e38a4c42c..b83f19f48a9 100644
--- a/tools/llm-sequential-upgrade/CLAUDE.md
+++ b/tools/llm-sequential-upgrade/CLAUDE.md
@@ -1,8 +1,6 @@
-# Sequential Upgrade: LLM Cost-to-Done Benchmark
+# Chat App: Build Instructions
 
-You are running an automated benchmark that measures the **total cost to build a fully working chat app** — comparing SpacetimeDB vs PostgreSQL.
-
-Your job is to **generate, build, deploy, and fix** the app. Grading happens in a separate manual session — you do NOT test in the browser.
+Your job is to **generate, build, deploy, and fix** a fully working chat app. Verification happens in a separate session — you do NOT test in the browser.
 
 ---
 
@@ -30,10 +28,18 @@ Depending on the mode passed in the launch prompt:
 
 ---
 
+## Shell Syntax
+
+Windows host with both a Bash and a PowerShell tool — don't mix syntax. In the Bash tool use
+POSIX: `mkdir -p` not `New-Item`, `sleep` not `Start-Sleep`, `2>/dev/null` not `2>$null`,
+`VAR=x` not `$VAR=x`. PowerShell cmdlets in bash fail with "command not found".
+
+---
+
 ## Anti-Contamination
 
 Do NOT read any files under:
-- `../llm-oneshot/apps/chat-app/typescript/` (graded reference implementations)
+- `../llm-oneshot/apps/chat-app/typescript/` (reference implementations)
 - `../llm-oneshot/apps/chat-app/staging/`
 - Any other AI-generated app code in this workspace
 
@@ -46,7 +52,7 @@ Only read files you created, the backend instructions, and the feature prompts.
 1. Read `backends/<backend>.md` for pre-flight checks, phases, and deploy steps
 2. Read the language setup: `../llm-oneshot/apps/chat-app/prompts/language/typescript-<backend>.md`
 3. Read the feature prompt: `../llm-oneshot/apps/chat-app/prompts/composed/<NN>_<name>.md`
-4. Follow the phases in the backend file (generate backend → bindings → client → verify → deploy)
+4. Follow the phases in the backend file, in order
 5. Output `DEPLOY_COMPLETE` when the dev server is confirmed running
 
 For **upgrade**: only add the NEW features from the target level. Do not rewrite existing working features.
@@ -62,8 +68,6 @@ For **upgrade**: only add the NEW features from the target level. Do not rewrite
 5. Append to `ITERATION_LOG.md` (see format below)
 6. Output `FIX_COMPLETE`
 
-Do NOT do browser testing — that happens in the grading session.
-
 ---
 
 ## ITERATION_LOG.md
@@ -85,6 +89,6 @@ Append to this file after every fix. Never overwrite.
 
 ---
 
-## Cost Tracking
+## Telemetry
 
-Cost is tracked automatically via OpenTelemetry — do NOT estimate tokens or produce a COST_REPORT.md. That is generated automatically after the session ends.
+Do NOT estimate tokens or produce a COST_REPORT.md — that's captured automatically after the session ends.
diff --git a/tools/llm-sequential-upgrade/DEVELOP.md b/tools/llm-sequential-upgrade/DEVELOP.md
index f77bc520dc7..18251388bb0 100644
--- a/tools/llm-sequential-upgrade/DEVELOP.md
+++ b/tools/llm-sequential-upgrade/DEVELOP.md
@@ -263,20 +263,20 @@ llm-sequential-upgrade/
   DEVELOP.md                       # This file (for humans)
   run.sh                           # Code Agent launcher (generate/fix/upgrade)
   grade.sh                         # Grade Agent launcher (interactive Chrome MCP)
-  grade-playwright.sh              # Grade via Playwright (optional, deterministic)
+  templates/                       # BUG_REPORT.md / ITERATION_LOG.md formats
   docker-compose.otel.yaml         # OTel Collector container
   otel-collector-config.yaml       # Collector config (OTLP → JSON files)
   parse-telemetry.mjs              # Telemetry → COST_REPORT.md
   backends/
     spacetime.md                   # SpacetimeDB-specific phases
-    spacetime-sdk-rules.md         # SpacetimeDB SDK patterns
     spacetime-templates.md         # Code templates
+    # SDK reference = the official skills/typescript-{server,client}/SKILL.md
     postgres.md                    # PostgreSQL-specific phases
+    mongodb.md                     # MongoDB-specific phases
   test-plans/
     feature-01-basic-chat.md       # Per-feature browser test scripts
     ...
     feature-15-anonymous-migration.md
-    playwright/                    # Optional Playwright test suite
   telemetry/                       # Shared OTel Collector output
   sequential-upgrade/              # Sequential upgrade test variant
     sequential-upgrade-YYYYMMDD/   # Dated run with results, telemetry, inputs
diff --git a/tools/llm-sequential-upgrade/GRADING.md b/tools/llm-sequential-upgrade/GRADING.md
index 0a0e02cf68e..d92afe2f85e 100644
--- a/tools/llm-sequential-upgrade/GRADING.md
+++ b/tools/llm-sequential-upgrade/GRADING.md
@@ -11,6 +11,7 @@ You need TWO Chrome browser profiles so each user gets completely separate ident
 1. **Browser A (default profile):** Navigate to the app URL and register as "Alice"
    - SpacetimeDB: `http://localhost:6173`
    - PostgreSQL: `http://localhost:6273`
+   - MongoDB: `http://localhost:6373`
 
 2. **Switch to Browser B:** Use `switch_browser` to switch to the second Chrome profile
 
diff --git a/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md b/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md
index 0088bac2a5a..1162597cbfc 100644
--- a/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md
+++ b/tools/llm-sequential-upgrade/GRADING_WORKFLOW.md
@@ -25,10 +25,12 @@ Code generation and fix iterations are token-tracked (the benchmark metric). Gra
 ```
 
 After generation, apps are running at:
-- **SpacetimeDB**: `http://localhost:5173` (run-index 0)
-- **PostgreSQL**: `http://localhost:5274` (run-index 1)
+- **SpacetimeDB**: `http://localhost:6173`
+- **PostgreSQL**: `http://localhost:6273`
+- **MongoDB**: `http://localhost:6373`
 
-Port offsets for parallel runs: run-index N uses ports `5173 + N*100` (spacetime) and `5174 + N*100` (postgres).
+Port offsets for parallel runs: run-index N adds N to the base port —
+`6173 + N` (spacetime), `6273 + N` (postgres), `6373 + N` (mongodb).
 
 ---
 
diff --git a/tools/llm-sequential-upgrade/README.md b/tools/llm-sequential-upgrade/README.md
index 0f2bd36ee72..1d6278e1260 100644
--- a/tools/llm-sequential-upgrade/README.md
+++ b/tools/llm-sequential-upgrade/README.md
@@ -21,14 +21,14 @@ Side-by-side results give a direct comparison of AI-generation cost across backe
 ## Directory contents
 
 - `run.sh`: orchestrates generation, upgrade, and fix sessions. Supports `--upgrade`, `--fix`, `--composed-prompt`, `--resume-session`.
-- `grade.sh` / `grade-agents.sh` / `grade-playwright.sh`: grading harnesses (manual + automated)
+- `grade.sh`: interactive grading harness (manual, Chrome MCP)
+- `templates/`: canonical `BUG_REPORT.md` / `ITERATION_LOG.md` formats for grading
 - `benchmark.sh` / `run-loop.sh`: batch runners for parallel or sequential benchmark execution
 - `cleanup.sh` / `reset-app.sh`: dev utilities
 - `benchmark-viewer.html`: local viewer for METRICS_DATA.json files (open in browser, drop JSON)
 - `generate-report.mjs`: aggregate per-session cost-summary.json into a markdown report
 - `parse-telemetry.mjs`: parse OTel log stream into per-session cost-summary.json
-- `parse-playwright-results.mjs`: convert Playwright JSON output to grading markdown
-- `docker-compose.otel.yaml` / `otel-collector-config.yaml`: OTel collector + PostgreSQL
+- `docker-compose.otel.yaml` / `otel-collector-config.yaml`: OTel collector + PostgreSQL + MongoDB
 - `backends/`: per-backend setup / SDK reference documents given to the AI
 - `perf-benchmark/`: runtime throughput benchmark (msgs/sec) for the AI-generated apps
 - `CLAUDE.md` / `DEVELOP.md` / `GRADING.md` / `GRADING_WORKFLOW.md`: process documentation
diff --git a/tools/llm-sequential-upgrade/RUNBOOK.md b/tools/llm-sequential-upgrade/RUNBOOK.md
new file mode 100644
index 00000000000..f811366f090
--- /dev/null
+++ b/tools/llm-sequential-upgrade/RUNBOOK.md
@@ -0,0 +1,263 @@
+# Sequential Upgrade Benchmark — Operator Runbook
+
+A practical reference for running the LLM cost-to-done benchmark end to end.
+Backend-agnostic, with MongoDB as the working example. Pair it with SpacetimeDB
+for the head-to-head.
+
+> What it measures: the total LLM cost (tokens + $), bug rate, and fix iterations
+> to build the *same* chat app on each backend, graded against the same feature
+> spec. Lower cost / fewer bugs / fewer iterations = easier to build on.
+
+---
+
+## 0. Backends, ports, layout (reference)
+
+| Backend | Vite (client) | API | Database | Title |
+|---|---|---|---|---|
+| `spacetime` | 6173 | (module on STDB :3000) | SpacetimeDB | "SpacetimeDB Chat" |
+| `postgres` | 6273 | Express :6001 | Postgres :6432 | "PostgreSQL Chat" |
+| `mongodb` | 6373 | Express :6001 | Mongo :6437 | "MongoDB Chat" |
+
+Parallel runs: `--run-index N` adds N to each port (6373+N, 6001+N, …) and uses a
+per-run database (`chat-app_runN`).
+
+Run output lands here (gitignored, published separately to the results repo):
+```
+sequential-upgrade/sequential-upgrade-YYYYMMDD/
+  <backend>/
+    results/chat-app-<ts>/        # the generated app
+      backend/ | server/  + client/
+      .benchmark-backend          # marker used by fix/upgrade/grade detection
+      level-1/ … level-11/        # snapshots taken before each upgrade
+      BUG_REPORT.md               # you write this when grading finds bugs
+      ITERATION_LOG.md            # fix history (appended each iteration)
+      GRADING_RESULTS.md          # your per-feature scores
+    telemetry/<run-id>/
+      cost-summary.json | COST_REPORT.md | metadata.json
+    inputs/                       # frozen prompt snapshot (reproducibility)
+  BENCHMARK_REPORT.md             # generated by generate-report.mjs
+```
+
+Level → features graded (each feature scored 0–3):
+
+| Level | Features | Max | Level | Features | Max |
+|---|---|---|---|---|---|
+| 1 | 1–4 | 12 | 7 | 1–10 | 30 |
+| 2 | 1–5 | 15 | 8 | 1–11 | 33 |
+| 3 | 1–6 | 18 | 9 | 1–12 | 36 |
+| 4 | 1–7 | 21 | 10 | 1–13 | 39 |
+| 5 | 1–8 | 24 | 11 | 1–14 | 42 |
+| 6 | 1–9 | 27 | 12 | 1–15 | 45 |
+
+---
+
+## 1. One-time setup
+
+- **Docker running.** Brings up OTel collector + Postgres + Mongo:
+  ```bash
+  cd tools/llm-sequential-upgrade
+  docker compose -f docker-compose.otel.yaml up -d
+  ```
+- **SpacetimeDB** (only if running the spacetime backend):
+  ```bash
+  spacetime start          # in its own terminal
+  ```
+- **Claude CLI** on PATH (or `npx @anthropic-ai/claude-code`), and **Node.js**.
+- **Run under git-bash** on Windows (the scripts assume it; `.gitattributes` keeps
+  them LF so they also work under WSL/CI).
+
+---
+
+## 2. Pre-run checklist (every session)
+
+```bash
+cd tools/llm-sequential-upgrade
+docker compose -f docker-compose.otel.yaml up -d        # idempotent
+docker exec llm-sequential-upgrade-mongodb-1 mongosh --quiet --eval "db.runCommand({ping:1})"
+# spacetime server ping local   # only for spacetime backend
+```
+All green → ready.
+
+---
+
+## 3. Pin the model (IMPORTANT for parity)
+
+Pin the **same** model for every backend and every level. The published runs used
+**Claude Sonnet 4.6**. Two equivalent ways:
+
+```bash
+./run.sh --model claude-sonnet-4-6 --level 1 --backend mongodb   # per-run flag
+# or set it for the shell (also covers batch runs via run-loop/benchmark):
+export ANTHROPIC_MODEL=claude-sonnet-4-6
+```
+`--model` overrides the env var. The chosen model is recorded in each run's
+`metadata.json` and printed in the run header — it's the single biggest
+comparability lever, so keep it identical across the whole comparison.
+
+---
+
+## 4. The core loop (per level, L1 → L12)
+
+For each backend you're testing (`mongodb`, and `spacetime` if pairing):
+
+### 4a. Generate L1 (or upgrade to level N)
+```bash
+# Level 1 from scratch:
+./run.sh --level 1 --backend mongodb
+
+# Upgrade an existing app to the next level (default = incremental feature file;
+# add --composed-prompt to use the full cumulative spec like the L1–L11 canon):
+./run.sh --upgrade <app-dir> --level 2
+```
+Output ends with `DEPLOY_COMPLETE` and prints the **app dir** and **COST_REPORT** path.
+Backend is auto-detected for `--upgrade`/`--fix` via the `.benchmark-backend` marker,
+so you don't pass `--backend` again.
+
+### 4b. Grade (manual — see §5)
+Test every feature in the browser, score 0–3, write `GRADING_RESULTS.md`. If any
+feature fails, write `BUG_REPORT.md` (template in `templates/`).
+
+### 4c. Fix (if bugs)
+```bash
+./run.sh --fix <app-dir>
+```
+Reads `BUG_REPORT.md`, fixes, redeploys, appends to `ITERATION_LOG.md`. Re-grade.
+Repeat 4b–4c until all features pass (or you hit your iteration cap). **Delete
+`BUG_REPORT.md`** once everything passes — the harness keys `--fix` on its presence.
+
+### 4d. Next level
+Back to 4a with `--upgrade <app-dir> --level N+1`. The app is snapshotted to
+`level-<N>` before each upgrade.
+
+> Tip: `run-loop.sh --backend mongodb --variant sequential-upgrade --level 12`
+> automates the generate→grade→fix→upgrade cycle, but grading still happens in an
+> interactive Claude session (Chrome MCP). For full manual control, drive 4a–4d
+> yourself.
+
+---
+
+## 5. Grading (manual)
+
+Grade **only from observed browser behavior**, never from source. Rubric:
+
+| Score | Meaning |
+|---|---|
+| 3 | Fully working as specified |
+| 2 | Mostly working; minor bugs / missing edge cases |
+| 1 | Partial; major issues |
+| 0 | Missing or broken |
+
+Hard rules: **JS console errors during a feature cap it at 2/3**; **real-time
+features that only work after a refresh cap at 1/3**; untestable → 0; when in
+doubt, score lower.
+
+### Two identities (needed for typing / read receipts / unread / presence)
+The app keys identity off `localStorage`, so two tabs in one profile = the same
+user. To get a real second user, either:
+- **Incognito window** as the second user (separate storage), or
+- **A scripted second socket** — register a second user via the API and open a
+  separate `socket.io` connection (the method used in validation). Drive user 1
+  through the real UI; trigger user 2's `typing` / `markRead` / messages via the
+  socket; grade user 1's UI reaction.
+
+Open the app at the backend's port (mongodb: `http://localhost:6373`).
+
+### Record results
+Write `GRADING_RESULTS.md` in the app dir (format in `GRADING.md`), one block per
+feature, plus a summary table ending in `| **TOTAL** | **X/Y** | |` —
+`generate-report.mjs` parses that line into the report's feature score.
+
+### File bugs
+Copy `templates/BUG_REPORT.template.md` → `<app-dir>/BUG_REPORT.md`. One `## Bug N`
+per issue, behavioral description, expected vs actual. That's the fix agent's input.
+
+---
+
+## 6. Reset DB between grading passes
+
+To grade from a clean slate (no leftover users/rooms/messages):
+```bash
+./reset-app.sh <app-dir>
+```
+Mongo: drops the database (Mongoose recreates collections on next write). Postgres:
+drops tables + `drizzle-kit push`. SpacetimeDB: publishes a fresh module.
+
+---
+
+## 7. Generate the comparison report
+
+After a run (one or both backends in the same dated dir):
+```bash
+node generate-report.mjs sequential-upgrade/sequential-upgrade-YYYYMMDD
+```
+Writes `BENCHMARK_REPORT.md` (cost, calls, tokens, duration, LOC; feature score if
+parseable). Aggregates per-backend cost from each session's `cost-summary.json`.
+
+> The investor-facing `METRICS_DATA.json` + public viewer have **no in-tree
+> generator** — that aggregation is a separate manual/out-of-tree step. Treat
+> `BENCHMARK_REPORT.md` as the local summary.
+
+---
+
+## 8. Cost tracking
+
+Cost is captured automatically via OpenTelemetry — **do not estimate tokens**.
+Per session: `telemetry/<run-id>/COST_REPORT.md` (human) + `cost-summary.json`
+(structured). The dollar figure comes straight from Claude Code's `cost_usd`, so
+it's apples-to-apples across backends *as long as the model is pinned the same*.
+
+Each run prepends a unique run-id to the prompt to bust the server-side prompt
+cache, so every run is a cold, fair measurement.
+
+---
+
+## 9. Teardown / start fresh
+
+Smoke-test or aborted-run cleanup (all of this is regenerable / gitignored):
+```bash
+cd tools/llm-sequential-upgrade
+npx kill-port 6001 6373                                          # stop dev servers
+docker exec llm-sequential-upgrade-mongodb-1 mongosh chat-app --quiet --eval "db.dropDatabase()"
+rm -rf sequential-upgrade/sequential-upgrade-YYYYMMDD            # the run dir
+: > telemetry/logs.jsonl && : > telemetry/metrics.jsonl         # shared telemetry
+```
+Leave the Mongo container + OTel collector **up** — the next run needs them.
+`cleanup.sh <app-dir>` (or `--all`) strips `node_modules`/`dist`/`.git` from an app
+dir without deleting the run.
+
+---
+
+## 10. Troubleshooting
+
+| Symptom | Check |
+|---|---|
+| `run.sh` exits at pre-flight | Is the DB container up? `docker compose … up -d`; for spacetime, `spacetime start`. |
+| Port already in use | `npx kill-port <port>` (6001/6373 for mongodb). |
+| Fix mode targets wrong port | Fixed — `run.sh` recomputes the Vite port after backend detection. |
+| Mongo app misdetected as Postgres | The `.benchmark-backend` marker disambiguates; confirm it exists in the app dir. |
+| OTel not capturing cost | `docker compose … logs otel-collector`; confirm `telemetry/logs.jsonl` is growing. |
+| Report finds no telemetry | Point `generate-report.mjs` at the **dated run dir**, not a backend subdir. |
+| Scripts fail under WSL/CI | Should be LF now (`.gitattributes`); re-checkout if you see `\r` errors. |
+| Session ran out of context | Lower the level, or resume: `./run.sh --upgrade <dir> --level N --resume-session`. |
+
+---
+
+## Quick reference — a full MongoDB pass
+
+```bash
+cd tools/llm-sequential-upgrade
+docker compose -f docker-compose.otel.yaml up -d
+export ANTHROPIC_MODEL=claude-sonnet-4-6
+
+./run.sh --level 1 --backend mongodb          # → APP=<printed app dir>
+#   grade at http://localhost:6373 → write GRADING_RESULTS.md (+ BUG_REPORT.md)
+./run.sh --fix "$APP"                          # repeat with grading until clean
+./reset-app.sh "$APP"                          # clean DB before re-grading
+
+for L in 2 3 4 5 6 7 8 9 10 11 12; do
+  ./run.sh --upgrade "$APP" --level $L
+  #   grade → fix loop at each level
+done
+
+node generate-report.mjs sequential-upgrade/sequential-upgrade-$(date +%Y%m%d)
+```
diff --git a/tools/llm-sequential-upgrade/backends/mongodb.md b/tools/llm-sequential-upgrade/backends/mongodb.md
new file mode 100644
index 00000000000..f842aea3d6e
--- /dev/null
+++ b/tools/llm-sequential-upgrade/backends/mongodb.md
@@ -0,0 +1,273 @@
+# Backend: MongoDB
+
+Standard Node.js/TypeScript backend — you only need this file from `backends/`.
+
+---
+
+## Architecture
+
+- **Server:** Node.js + Express + Mongoose (ODM) + Socket.io
+- **Client:** React + Vite + TypeScript + Socket.io-client
+- **Database:** MongoDB (running in Docker)
+
+The server handles:
+- REST API endpoints for CRUD operations
+- Socket.io for real-time events (messages, typing, presence, etc.)
+- Mongoose models/queries for the database
+- Session/identity management
+
+**Real-time:** Use Socket.io to broadcast changes (messages, typing, presence) to
+connected clients. Do NOT use MongoDB change streams — the database runs as a single
+node, and the real-time layer is the application's responsibility (same model as a
+standard MERN-stack app).
+
+---
+
+## MongoDB Connection
+
+MongoDB is already running in a Docker container.
+
+| Parameter | Value |
+|-----------|-------|
+| Host | `localhost` |
+| Port | `6437` (mapped from container 27017) |
+| Database | `chat-app` |
+| Container | `llm-sequential-upgrade-mongodb-1` |
+| Connection URL | `mongodb://localhost:6437/chat-app` |
+
+---
+
+## Pre-flight Check
+
+```bash
+docker exec llm-sequential-upgrade-mongodb-1 mongosh --quiet --eval "db.runCommand({ping:1})"
+```
+
+If MongoDB is not reachable, STOP and report the error.
+
+---
+
+## Directory Structure
+
+```
+<app-dir>/
+  server/
+    package.json
+    tsconfig.json
+    .env
+    src/
+      models.ts      # Mongoose schema/model definitions
+      index.ts       # Express server + Socket.io + routes
+  client/
+    package.json
+    vite.config.ts
+    tsconfig.json
+    index.html
+    src/
+      main.tsx       # React entry point
+      App.tsx        # Main application component
+      styles.css     # Dark theme styling
+```
+
+---
+
+## Phase 1: Generate Server
+
+Create the Express + Socket.io server:
+
+- `server/package.json`:
+  ```json
+  {
+    "name": "chat-server",
+    "type": "module",
+    "scripts": {
+      "dev": "tsx watch src/index.ts",
+      "start": "tsx src/index.ts"
+    },
+    "dependencies": {
+      "express": "^4.18.2",
+      "@types/express": "^4.17.21",
+      "mongoose": "^8.9.0",
+      "socket.io": "^4.7.4",
+      "cors": "^2.8.5",
+      "@types/cors": "^2.8.17",
+      "dotenv": "^16.4.5",
+      "tsx": "^4.19.0",
+      "typescript": "^5.4.0"
+    }
+  }
+  ```
+
+- `server/tsconfig.json`:
+  ```json
+  {
+    "compilerOptions": {
+      "target": "ES2022",
+      "module": "ES2022",
+      "moduleResolution": "bundler",
+      "esModuleInterop": true,
+      "strict": true,
+      "outDir": "dist",
+      "rootDir": "src",
+      "skipLibCheck": true
+    },
+    "include": ["src/**/*"]
+  }
+  ```
+
+- `server/.env`:
+  ```
+  DATABASE_URL=mongodb://localhost:6437/chat-app
+  PORT=6001
+  ```
+
+- `server/src/models.ts` — Mongoose schemas/models for all features
+- `server/src/index.ts` — Express server with:
+  - CORS configured for `http://localhost:6373`
+  - Socket.io with CORS
+  - REST endpoints for the app's resources (per the feature spec)
+  - Socket.io events for real-time updates (per the feature spec)
+  - Database access via Mongoose (`mongoose.connect(process.env.DATABASE_URL)`)
+
+Install:
+```bash
+cd <server-dir> && npm install
+```
+
+MongoDB is schemaless and Mongoose creates collections/indexes on first use — there
+is **no migration / schema-push step**. (If you declare indexes on a schema, they are
+built automatically when the model is first used.)
+
+---
+
+## Phase 2: (No bindings step)
+
+Skip — MongoDB has no binding generation. The client calls REST/Socket.io APIs directly.
+
+---
+
+## Phase 3: Generate Client
+
+- `client/package.json`:
+  ```json
+  {
+    "name": "chat-client",
+    "type": "module",
+    "scripts": {
+      "dev": "vite",
+      "build": "tsc -b && vite build"
+    },
+    "dependencies": {
+      "react": "^18.3.1",
+      "react-dom": "^18.3.1",
+      "socket.io-client": "^4.7.4"
+    },
+    "devDependencies": {
+      "@types/react": "^18.3.12",
+      "@types/react-dom": "^18.3.1",
+      "@vitejs/plugin-react": "^4.3.4",
+      "typescript": "^5.4.0",
+      "vite": "^6.0.0"
+    }
+  }
+  ```
+
+- `client/vite.config.ts` — port **6373** (do not use 6173 or 6273 — they may be in use), proxy `/api` and `/socket.io` to `http://localhost:6001`
+  ```typescript
+  import { defineConfig } from 'vite';
+  import react from '@vitejs/plugin-react';
+
+  export default defineConfig({
+    plugins: [react()],
+    server: {
+      port: 6373,
+      proxy: {
+        '/api': 'http://localhost:6001',
+        '/socket.io': {
+          target: 'http://localhost:6001',
+          ws: true,
+        },
+      },
+    },
+  });
+  ```
+
+- `client/tsconfig.json`
+- `client/index.html`
+- `client/src/main.tsx` — React entry point
+- `client/src/App.tsx` — Main component using `fetch('/api/...')` + Socket.io client
+- `client/src/styles.css` — Dark theme styling
+
+**The client connects to the server via the Vite proxy** — no hardcoded localhost:6001 in client code.
+
+**Critical:** Initialize the socket.io client without a hardcoded URL so it routes through the Vite proxy (e.g. `io()` or `io({ path: '/socket.io' })`). Hardcoding `http://localhost:6001` bypasses the proxy and breaks WebSocket upgrades.
+
+---
+
+## Phase 4: Verify
+
+```bash
+# Server
+cd <server-dir> && npm install && npx tsc --noEmit
+
+# Client
+cd <client-dir> && npm install && npx tsc --noEmit && npm run build
+```
+
+Both must pass. If either fails:
+1. Read the error
+2. Fix the code
+3. Retry (up to 3 attempts)
+4. Each fix counts as a **reprompt** — log it
+
+---
+
+## Phase 5: Deploy
+
+```bash
+# Kill any existing servers
+npx kill-port 6373 2>/dev/null || true
+npx kill-port 6001 2>/dev/null || true
+
+# Start the API server in background
+cd <server-dir> && npx tsx src/index.ts &
+
+# Wait for API server to be ready (poll http://localhost:6001 up to 30s)
+
+# Start client dev server in background
+cd <client-dir> && npm run dev &
+```
+
+Wait for both servers to be ready:
+- API server at `http://localhost:6001`
+- Client dev server at `http://localhost:6373`
+
+---
+
+## Redeploy (for fix iterations)
+
+- If **server changed**: kill and restart the Express server
+  ```bash
+  npx kill-port 6001 2>/dev/null || true
+  cd <server-dir> && npx tsx src/index.ts &
+  ```
+- If **models/schema changed**: no migration step — Mongoose applies the new schema
+  on connect (existing documents are not rewritten). Just restart the Express server.
+- If **client changed**: Vite HMR handles it automatically (or restart dev server if needed)
+
+---
+
+## App Identity
+
+- HTML `<title>` MUST be **"MongoDB Chat"** (not a generic "Chat App")
+- The app MUST show **"MongoDB Chat"** as the visible header/title in the UI
+
+---
+
+## Port Configuration
+
+| Service | Port | Notes |
+|---------|------|-------|
+| MongoDB (Docker) | 6437 | Database |
+| Express API server | 6001 | REST + Socket.io |
+| Vite dev server | **6373** | React client — do not use 6173 or 6273 |
diff --git a/tools/llm-sequential-upgrade/backends/postgres.md b/tools/llm-sequential-upgrade/backends/postgres.md
index f65246cc0d2..64b8cb161dd 100644
--- a/tools/llm-sequential-upgrade/backends/postgres.md
+++ b/tools/llm-sequential-upgrade/backends/postgres.md
@@ -1,8 +1,6 @@
 # Backend: PostgreSQL
 
-Instructions for generating, building, and deploying the **PostgreSQL** backend.
-
-**Do NOT read SpacetimeDB SDK rule files.** This backend uses standard Node.js/TypeScript patterns.
+Standard Node.js/TypeScript backend — you only need this file from `backends/`.
 
 ---
 
@@ -186,7 +184,7 @@ Skip — PostgreSQL has no binding generation. The client calls REST/Socket.io A
   }
   ```
 
-- `client/vite.config.ts` — port **6273** (NOT 6173 — that's SpacetimeDB), proxy `/api` and `/socket.io` to `http://localhost:6001`
+- `client/vite.config.ts` — port **6273** (do not use 6173 or 6373 — they may be in use), proxy `/api` and `/socket.io` to `http://localhost:6001`
   ```typescript
   import { defineConfig } from 'vite';
   import react from '@vitejs/plugin-react';
@@ -273,29 +271,10 @@ Wait for both servers to be ready:
 
 ---
 
-## Key Differences from SpacetimeDB
-
-For context on what makes this backend different (this helps the benchmark comparison):
-
-| Aspect | SpacetimeDB | PostgreSQL |
-|--------|-------------|------------|
-| Real-time | Built-in subscriptions | Socket.io (manual) |
-| API layer | Reducers (auto-exposed) | Express routes (manual) |
-| Schema | `table()` + `reducer()` | Drizzle `pgTable()` |
-| Bindings | Auto-generated types | Manual type definitions |
-| Deployment | `spacetime publish` | Start Express server |
-| State sync | Automatic client cache | Manual fetch + Socket.io |
-| Online presence | Via lifecycle hooks | Manual Socket.io tracking |
-| Typing indicators | Reducer + subscription | Socket.io events |
-| Infra dependencies | SpacetimeDB only | PostgreSQL + Express + Socket.io + CORS |
-
----
-
 ## App Identity
 
-- HTML `<title>` MUST be **"PostgreSQL Chat"** (not "Chat App", not "SpacetimeDB Chat")
+- HTML `<title>` MUST be **"PostgreSQL Chat"** (not a generic "Chat App")
 - The app MUST show **"PostgreSQL Chat"** as the visible header/title in the UI
-- This distinguishes it from the SpacetimeDB version during testing
 
 ---
 
@@ -305,10 +284,4 @@ For context on what makes this backend different (this helps the benchmark compa
 |---------|------|-------|
 | PostgreSQL (Docker) | 6432 | Database |
 | Express API server | 6001 | REST + Socket.io |
-| Vite dev server | **6273** | React client — NOT 6173 (that's SpacetimeDB) |
-
----
-
-## Reference Files
-
-The language and feature prompt files are provided as absolute paths in the launch prompt. No additional reference files are needed — this backend uses standard Node.js/TypeScript patterns.
+| Vite dev server | **6273** | React client — do not use 6173 or 6373 |
diff --git a/tools/llm-sequential-upgrade/backends/spacetime-sdk-rules.md b/tools/llm-sequential-upgrade/backends/spacetime-sdk-rules.md
deleted file mode 100644
index 337af9269a4..00000000000
--- a/tools/llm-sequential-upgrade/backends/spacetime-sdk-rules.md
+++ /dev/null
@@ -1,258 +0,0 @@
-# SpacetimeDB TypeScript SDK Reference
-
-## Imports
-
-```typescript
-import { schema, table, t } from 'spacetimedb/server';
-import { SenderError } from 'spacetimedb/server';
-import { ScheduleAt } from 'spacetimedb';        // for scheduled tables only
-```
-
-## Tables
-
-`table(OPTIONS, COLUMNS)` — two arguments. The `name` field MUST be snake_case:
-
-```typescript
-const entity = table(
-  { name: 'entity', public: true },
-  {
-    identity: t.identity().primaryKey(),
-    name: t.string(),
-    active: t.bool(),
-  }
-);
-```
-
-Options: `name` (snake_case, required), `public: true`, `event: true`, `scheduled: (): any => reducerRef`, `indexes: [...]`
-
-`ctx.db` accessors use the JS variable name (camelCase), not the SQL name.
-
-## Column Types
-
-| Builder | JS type | Notes |
-|---------|---------|-------|
-| `t.u64()` | bigint | Use `0n` literals |
-| `t.i64()` | bigint | Use `0n` literals |
-| `t.u32()` / `t.i32()` | number | |
-| `t.f64()` / `t.f32()` | number | |
-| `t.bool()` | boolean | |
-| `t.string()` | string | |
-| `t.identity()` | Identity | |
-| `t.timestamp()` | Timestamp | |
-| `t.scheduleAt()` | ScheduleAt | |
-
-Modifiers: `.primaryKey()`, `.autoInc()`, `.unique()`, `.index('btree')`
-
-Optional columns: `nickname: t.option(t.string())`
-
-## Indexes
-
-Prefer inline `.index('btree')` for single-column. Use named indexes only for multi-column:
-
-```typescript
-// Inline (preferred):
-authorId: t.u64().index('btree'),
-// Access: ctx.db.post.authorId.filter(authorId);
-
-// Multi-column (named):
-indexes: [{ accessor: 'by_cat_sev', algorithm: 'btree', columns: ['category', 'severity'] }]
-```
-
-## Schema Export
-
-```typescript
-const spacetimedb = schema({ entity, record });  // ONE object, not spread args
-export default spacetimedb;
-```
-
-## Reducers
-
-Export name becomes the reducer name:
-
-```typescript
-export const createEntity = spacetimedb.reducer(
-  { name: t.string(), age: t.i32() },
-  (ctx, { name, age }) => {
-    ctx.db.entity.insert({ identity: ctx.sender, name, age, active: true });
-  }
-);
-
-// No arguments — just the callback:
-export const doReset = spacetimedb.reducer((ctx) => { ... });
-```
-
-## DB Operations
-
-```typescript
-ctx.db.entity.insert({ id: 0n, name: 'Sample' });          // Insert (0n for autoInc)
-ctx.db.entity.id.find(entityId);                           // Find by PK → row | null
-ctx.db.entity.identity.find(ctx.sender);                   // Find by unique column
-[...ctx.db.item.authorId.filter(authorId)];                // Filter → spread to Array
-[...ctx.db.entity.iter()];                                 // All rows → Array
-ctx.db.entity.id.update({ ...existing, name: newName });   // Update (spread + override)
-ctx.db.entity.id.delete(entityId);                         // Delete by PK
-```
-
-Note: `iter()` and `filter()` return iterators. Spread to Array for `.sort()`, `.filter()`, `.map()`.
-
-## Lifecycle Hooks
-
-MUST be `export const` — bare calls are silently ignored:
-
-```typescript
-export const init = spacetimedb.init((ctx) => { ... });
-export const onConnect = spacetimedb.clientConnected((ctx) => { ... });
-export const onDisconnect = spacetimedb.clientDisconnected((ctx) => { ... });
-```
-
-## Authentication & Timestamps
-
-```typescript
-// Auth: ctx.sender is the caller's Identity
-if (!row.owner.equals(ctx.sender)) throw new SenderError('unauthorized');
-
-// Server timestamps
-ctx.db.item.insert({ id: 0n, createdAt: ctx.timestamp });
-
-// Client: Timestamp → Date
-new Date(Number(row.createdAt.microsSinceUnixEpoch / 1000n));
-```
-
-## Scheduled Tables
-
-```typescript
-const tickTimer = table({
-  name: 'tick_timer',
-  scheduled: (): any => tick,   // (): any => breaks circular dep
-}, {
-  scheduledId: t.u64().primaryKey().autoInc(),
-  scheduledAt: t.scheduleAt(),
-});
-
-export const tick = spacetimedb.reducer(
-  { timer: tickTimer.rowType },
-  (ctx, { timer }) => { /* timer row auto-deleted after this runs */ }
-);
-
-// One-time: ScheduleAt.time(ctx.timestamp.microsSinceUnixEpoch + delayMicros)
-// Repeating: ScheduleAt.interval(60_000_000n)
-```
-
-## React Client
-
-### main.tsx — SpacetimeDBProvider is required
-
-```typescript
-import React, { useMemo } from 'react';
-import ReactDOM from 'react-dom/client';
-import { SpacetimeDBProvider } from 'spacetimedb/react';
-import { DbConnection } from './module_bindings';
-import { MODULE_NAME, SPACETIMEDB_URI } from './config';
-import App from './App';
-
-function Root() {
-  const connectionBuilder = useMemo(() =>
-    DbConnection.builder()
-      .withUri(SPACETIMEDB_URI)
-      .withDatabaseName(MODULE_NAME)
-      .withToken(localStorage.getItem('auth_token') || undefined),
-    []
-  );
-  return (
-    <SpacetimeDBProvider connectionBuilder={connectionBuilder}>
-      <App />
-    </SpacetimeDBProvider>
-  );
-}
-
-ReactDOM.createRoot(document.getElementById('root')!).render(<Root />);
-```
-
-### App.tsx patterns
-
-```typescript
-import { useTable, useSpacetimeDB } from 'spacetimedb/react';
-import { DbConnection, tables } from './module_bindings';
-
-function App() {
-  const { isActive, identity: myIdentity, token, getConnection } = useSpacetimeDB();
-  const conn = getConnection() as DbConnection | null;
-
-  // Save auth token
-  useEffect(() => { if (token) localStorage.setItem('auth_token', token); }, [token]);
-
-  // Subscribe when connected
-  useEffect(() => {
-    if (!conn || !isActive) return;
-    conn.subscriptionBuilder()
-      .onApplied(() => setSubscribed(true))
-      .subscribe(['SELECT * FROM entity', 'SELECT * FROM record']);
-  }, [conn, isActive]);
-
-  // Reactive data
-  const [entities] = useTable(tables.entity);
-  const [records] = useTable(tables.record);
-
-  // Call reducers with object syntax
-  conn?.reducers.addRecord({ data });
-
-  // Compare identities
-  const isMe = row.owner.toHexString() === myIdentity?.toHexString();
-}
-```
-
-## Complete Example
-
-```typescript
-// schema.ts
-import { schema, table, t } from 'spacetimedb/server';
-
-const entity = table({ name: 'entity', public: true }, {
-  identity: t.identity().primaryKey(),
-  name: t.string(),
-  active: t.bool(),
-});
-
-const record = table({ name: 'record', public: true }, {
-  id: t.u64().primaryKey().autoInc(),
-  owner: t.identity(),
-  value: t.u32(),
-  createdAt: t.timestamp(),
-});
-
-const spacetimedb = schema({ entity, record });
-export default spacetimedb;
-```
-
-```typescript
-// index.ts
-import spacetimedb from './schema';
-import { t, SenderError } from 'spacetimedb/server';
-export { default } from './schema';
-
-export const onConnect = spacetimedb.clientConnected((ctx) => {
-  const existing = ctx.db.entity.identity.find(ctx.sender);
-  if (existing) ctx.db.entity.identity.update({ ...existing, active: true });
-});
-
-export const onDisconnect = spacetimedb.clientDisconnected((ctx) => {
-  const existing = ctx.db.entity.identity.find(ctx.sender);
-  if (existing) ctx.db.entity.identity.update({ ...existing, active: false });
-});
-
-export const createEntity = spacetimedb.reducer(
-  { name: t.string() },
-  (ctx, { name }) => {
-    if (ctx.db.entity.identity.find(ctx.sender)) throw new SenderError('already exists');
-    ctx.db.entity.insert({ identity: ctx.sender, name, active: true });
-  }
-);
-
-export const addRecord = spacetimedb.reducer(
-  { value: t.u32() },
-  (ctx, { value }) => {
-    if (!ctx.db.entity.identity.find(ctx.sender)) throw new SenderError('not found');
-    ctx.db.record.insert({ id: 0n, owner: ctx.sender, value, createdAt: ctx.timestamp });
-  }
-);
-```
diff --git a/tools/llm-sequential-upgrade/backends/spacetime-templates.md b/tools/llm-sequential-upgrade/backends/spacetime-templates.md
index 0847c58f21a..c0cc4a7f9fb 100644
--- a/tools/llm-sequential-upgrade/backends/spacetime-templates.md
+++ b/tools/llm-sequential-upgrade/backends/spacetime-templates.md
@@ -10,6 +10,9 @@
   "version": "1.0.0",
   "dependencies": {
     "spacetimedb": "^2.0.0"
+  },
+  "devDependencies": {
+    "typescript": "^5.7.2"
   }
 }
 ```
@@ -20,7 +23,7 @@
   "compilerOptions": {
     "target": "ES2020",
     "module": "ESNext",
-    "moduleResolution": "node",
+    "moduleResolution": "bundler",
     "strict": true,
     "esModuleInterop": true,
     "skipLibCheck": true,
@@ -50,8 +53,7 @@ Why this structure? Avoids circular dependency issues between tables and reducer
   "version": "1.0.0",
   "type": "module",
   "scripts": {
-    "kill-port": "npx kill-port 6173 2>nul || true",
-    "dev": "npm run kill-port && vite",
+    "dev": "vite",
     "build": "tsc && vite build",
     "preview": "vite preview"
   },
@@ -99,8 +101,8 @@ export default defineConfig({
     "noEmit": true,
     "jsx": "react-jsx",
     "strict": true,
-    "noUnusedLocals": true,
-    "noUnusedParameters": true,
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
     "noFallthroughCasesInSwitch": true
   },
   "include": ["src"]
diff --git a/tools/llm-sequential-upgrade/backends/spacetime.md b/tools/llm-sequential-upgrade/backends/spacetime.md
index 891206b8090..bc322267701 100644
--- a/tools/llm-sequential-upgrade/backends/spacetime.md
+++ b/tools/llm-sequential-upgrade/backends/spacetime.md
@@ -1,6 +1,6 @@
 # Backend: SpacetimeDB
 
-Instructions for generating, building, and deploying the **SpacetimeDB** backend.
+Server module + React client built on the SpacetimeDB TypeScript SDK.
 
 ---
 
@@ -114,17 +114,21 @@ Wait for the dev server to be ready (poll `http://localhost:6173` up to 30 secon
 
 ## App Identity
 
-- HTML `<title>` MUST be **"SpacetimeDB Chat"** (not "Chat App" or anything generic)
+- HTML `<title>` MUST be **"SpacetimeDB Chat"** (not a generic "Chat App")
 - The app MUST show **"SpacetimeDB Chat"** as the visible header/title in the UI
-- This distinguishes it from the PostgreSQL version during testing
 
 ---
 
-## Redeploy (for fix iterations)
+## Redeploy (fix iterations & upgrades)
 
 - If **backend changed**: re-publish module, regenerate bindings if schema changed
   ```bash
   spacetime publish chat-app-<timestamp> --module-path <backend-dir>
   spacetime generate --lang typescript --out-dir <client>/src/module_bindings --module-path <backend-dir>
   ```
+  **If the schema changed (common on upgrades)**, a plain publish aborts needing a migration and
+  prompts `[y/N]` (hangs headless). Skip it — go straight to the non-interactive wipe-and-publish:
+  ```bash
+  echo y | spacetime publish chat-app-<timestamp> --module-path <backend-dir> --delete-data
+  ```
 - If **client changed**: Vite HMR handles it automatically (or restart dev server if needed)
diff --git a/tools/llm-sequential-upgrade/benchmark.sh b/tools/llm-sequential-upgrade/benchmark.sh
index 9ba4f0f4b89..068f05de316 100644
--- a/tools/llm-sequential-upgrade/benchmark.sh
+++ b/tools/llm-sequential-upgrade/benchmark.sh
@@ -21,7 +21,6 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 NUM_RUNS=3
 VARIANT="sequential-upgrade"
 RULES="guided"
-TEST_MODE=""
 LEVEL=""
 BACKENDS=("spacetime" "postgres")
 
@@ -30,18 +29,12 @@ while [[ $# -gt 0 ]]; do
     --runs) NUM_RUNS="$2"; shift 2 ;;
     --variant) VARIANT="$2"; shift 2 ;;
     --rules) RULES="$2"; shift 2 ;;
-    --test) TEST_MODE="$2"; shift 2 ;;
     --level) LEVEL="$2"; shift 2 ;;
     --backend) BACKENDS=("$2"); shift 2 ;;
     *) echo "Unknown option: $1"; exit 1 ;;
   esac
 done
 
-TEST_FLAG=""
-if [[ -n "$TEST_MODE" ]]; then
-  TEST_FLAG="--test $TEST_MODE"
-fi
-
 # ─── Compute total parallel instances ────────────────────────────────────────
 
 NUM_BACKENDS=${#BACKENDS[@]}
@@ -135,7 +128,6 @@ for run_num in $(seq 1 "$NUM_RUNS"); do
         --variant "$VARIANT" \
         --level "${LEVEL:-7}" \
         --rules "$RULES" \
-        $TEST_FLAG \
         --run-index "$RUN_INDEX"
       update_status "$RUN_INDEX" "$backend" "completed" "exit=$?"
     ) > "$LOG_FILE" 2>&1 &
diff --git a/tools/llm-sequential-upgrade/docker-compose.otel.yaml b/tools/llm-sequential-upgrade/docker-compose.otel.yaml
index c5b529925bc..58ba34a5b5d 100644
--- a/tools/llm-sequential-upgrade/docker-compose.otel.yaml
+++ b/tools/llm-sequential-upgrade/docker-compose.otel.yaml
@@ -28,5 +28,21 @@ services:
       timeout: 5s
       retries: 5
 
+  # Standard MERN-stack MongoDB: single node, manual Socket.io for real-time
+  # (deliberately NOT a replica set / change streams — keeps the comparison
+  # symmetric with the Postgres backend).
+  mongodb:
+    image: mongo:7
+    ports:
+      - "6437:27017"
+    volumes:
+      - llm-sequential-upgrade-mongodata:/data/db
+    healthcheck:
+      test: ["CMD", "mongosh", "--quiet", "--eval", "db.runCommand({ping:1})"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
 volumes:
   llm-sequential-upgrade-pgdata:
+  llm-sequential-upgrade-mongodata:
diff --git a/tools/llm-sequential-upgrade/generate-report.mjs b/tools/llm-sequential-upgrade/generate-report.mjs
index f0e78f56819..7a7e4c0517d 100644
--- a/tools/llm-sequential-upgrade/generate-report.mjs
+++ b/tools/llm-sequential-upgrade/generate-report.mjs
@@ -22,20 +22,24 @@ if (!runBaseDir) {
   process.exit(1);
 }
 
-// Find all cost-summary.json files
-const telemetryDir = path.join(runBaseDir, 'telemetry');
-if (!fs.existsSync(telemetryDir)) {
-  console.error(`Telemetry directory not found: ${telemetryDir}`);
+// Find all cost-summary.json files. Layout is per-backend:
+//   <runBaseDir>/<backend>/telemetry/<run-id>/cost-summary.json
+if (!fs.existsSync(runBaseDir)) {
+  console.error(`Run base directory not found: ${runBaseDir}`);
   process.exit(1);
 }
 
 const summaries = [];
-for (const entry of fs.readdirSync(telemetryDir)) {
-  const summaryPath = path.join(telemetryDir, entry, 'cost-summary.json');
-  if (fs.existsSync(summaryPath)) {
-    const data = JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
-    data._dir = entry;
-    summaries.push(data);
+for (const backendEntry of fs.readdirSync(runBaseDir)) {
+  const telemetryDir = path.join(runBaseDir, backendEntry, 'telemetry');
+  if (!fs.existsSync(telemetryDir) || !fs.statSync(telemetryDir).isDirectory()) continue;
+  for (const entry of fs.readdirSync(telemetryDir)) {
+    const summaryPath = path.join(telemetryDir, entry, 'cost-summary.json');
+    if (fs.existsSync(summaryPath)) {
+      const data = JSON.parse(fs.readFileSync(summaryPath, 'utf-8'));
+      data._dir = entry;
+      summaries.push(data);
+    }
   }
 }
 
@@ -79,7 +83,7 @@ function calcTotals(runs) {
 
 // Read GRADING_RESULTS.md for feature scores
 function readGradingScores(backend) {
-  const resultsDir = path.join(runBaseDir, 'results', backend);
+  const resultsDir = path.join(runBaseDir, backend, 'results');
   if (!fs.existsSync(resultsDir)) return null;
 
   const appDirs = fs.readdirSync(resultsDir)
@@ -96,13 +100,20 @@ function readGradingScores(backend) {
 
   const content = fs.readFileSync(gradingPath, 'utf-8');
 
-  // Extract total score from "**TOTAL** | **N** | **M**"
-  const totalMatch = content.match(/\*\*TOTAL\*\*.*?\*\*(\d+)\*\*.*?\*\*(\d+)\*\*/);
-  if (totalMatch) {
-    return { max: parseInt(totalMatch[1]), score: parseInt(totalMatch[2]) };
+  // Primary (canonical GRADING_RESULTS.md): "| **TOTAL** | **X/Y** | |"
+  // — score / max combined in one cell.
+  const slashMatch = content.match(/\*\*TOTAL\*\*.*?\*\*(\d+)\s*\/\s*(\d+)\*\*/);
+  if (slashMatch) {
+    return { score: parseInt(slashMatch[1]), max: parseInt(slashMatch[2]) };
+  }
+
+  // Legacy: "**TOTAL** | **MAX** | **SCORE**" — two separate numeric cells.
+  const twoCellMatch = content.match(/\*\*TOTAL\*\*.*?\*\*(\d+)\*\*.*?\*\*(\d+)\*\*/);
+  if (twoCellMatch) {
+    return { max: parseInt(twoCellMatch[1]), score: parseInt(twoCellMatch[2]) };
   }
 
-  // Fallback: look for "Total Feature Score" in metrics
+  // Fallback: prose "Total Feature Score: X / Y".
   const scoreMatch = content.match(/Total Feature Score.*?(\d+)\s*\/\s*(\d+)/);
   if (scoreMatch) {
     return { score: parseInt(scoreMatch[1]), max: parseInt(scoreMatch[2]) };
@@ -113,7 +124,7 @@ function readGradingScores(backend) {
 
 // Count lines of code in app dir
 function countLoc(backend) {
-  const resultsDir = path.join(runBaseDir, 'results', backend);
+  const resultsDir = path.join(runBaseDir, backend, 'results');
   if (!fs.existsSync(resultsDir)) return null;
 
   const appDirs = fs.readdirSync(resultsDir)
@@ -149,10 +160,10 @@ function countLoc(backend) {
     backendLoc = countLines(stdbBackend);
   }
 
-  // PostgreSQL backend
-  const pgServer = path.join(appDir, 'server');
-  if (fs.existsSync(pgServer)) {
-    backendLoc = countLines(pgServer);
+  // Express backend (postgres + mongodb both use a server/ dir)
+  const expressServer = path.join(appDir, 'server');
+  if (fs.existsSync(expressServer)) {
+    backendLoc = countLines(expressServer);
   }
 
   // Frontend
diff --git a/tools/llm-sequential-upgrade/grade-agents.sh b/tools/llm-sequential-upgrade/grade-agents.sh
deleted file mode 100644
index d693a0e7165..00000000000
--- a/tools/llm-sequential-upgrade/grade-agents.sh
+++ /dev/null
@@ -1,160 +0,0 @@
-#!/bin/bash
-# Sequential Upgrade — Playwright Agents Grading
-#
-# Uses Playwright's AI-powered agents to grade a deployed app.
-# The Generator agent discovers UI elements from the live DOM,
-# writes tests with validated selectors, and runs them.
-# The Healer agent auto-fixes failing selectors.
-#
-# Usage:
-#   ./grade-agents.sh <app-dir>
-#
-# Prerequisites:
-#   cd test-plans/playwright && npm install && npx playwright install chromium
-#   npx playwright init-agents --loop=claude
-
-set -euo pipefail
-
-APP_DIR="${1:?Usage: ./grade-agents.sh <app-dir>}"
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PLAYWRIGHT_DIR="$SCRIPT_DIR/test-plans/playwright"
-
-if [[ ! -d "$APP_DIR" ]]; then
-  echo "ERROR: App directory not found: $APP_DIR"
-  exit 1
-fi
-
-# Check Playwright agents are initialized
-if [[ ! -f "$PLAYWRIGHT_DIR/.claude/agents/playwright-test-generator.md" ]]; then
-  echo "ERROR: Playwright agents not initialized."
-  echo "Run: cd test-plans/playwright && npx playwright init-agents --loop=claude"
-  exit 1
-fi
-
-# Auto-detect backend
-if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
-  GRADE_BACKEND="spacetime"
-  DEFAULT_PORT=5173
-elif [[ -d "$APP_DIR/server" ]]; then
-  GRADE_BACKEND="postgres"
-  DEFAULT_PORT=5174
-else
-  GRADE_BACKEND="unknown"
-  DEFAULT_PORT=5173
-fi
-
-# Try to read port from metadata
-VITE_PORT="$DEFAULT_PORT"
-RUN_BASE="$(cd "$APP_DIR/../../.." 2>/dev/null && pwd)"
-if [[ -d "$RUN_BASE/telemetry" ]]; then
-  LATEST_META=$(find "$RUN_BASE/telemetry" -name "metadata.json" -path "*$GRADE_BACKEND*" -exec ls -t {} + 2>/dev/null | head -1)
-  if [[ -n "$LATEST_META" ]]; then
-    META_PORT=$(node -e "const m=JSON.parse(require('fs').readFileSync(process.argv[1],'utf-8')); process.stdout.write(String(m.vitePort||''))" -- "$(cygpath -w "$LATEST_META" 2>/dev/null || echo "$LATEST_META")" 2>/dev/null)
-    if [[ -n "$META_PORT" ]]; then
-      VITE_PORT="$META_PORT"
-    fi
-  fi
-fi
-
-APP_URL="http://localhost:$VITE_PORT"
-
-echo "=== Sequential Upgrade: Playwright Agents Grade ==="
-echo "  App dir:  $APP_DIR"
-echo "  Backend:  $GRADE_BACKEND (port $VITE_PORT)"
-echo "  URL:      $APP_URL"
-echo ""
-
-# Reset backend state for a clean test
-echo "Resetting backend state..."
-"$SCRIPT_DIR/reset-app.sh" "$APP_DIR" || echo "WARNING: Backend reset failed"
-sleep 3
-
-# Update seed test to point at the correct URL
-cat > "$PLAYWRIGHT_DIR/specs/seed.spec.ts" <<EOF
-import { test, expect } from '@playwright/test';
-
-test.describe('Seed', () => {
-  test('seed', async ({ page }) => {
-    await page.goto('$APP_URL');
-    await page.waitForSelector('input, button', { timeout: 30_000 });
-  });
-});
-EOF
-
-# Add Claude Code desktop install to PATH
-_APPDATA_UNIX="${APPDATA:-$HOME/AppData/Roaming}"
-if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
-  _APPDATA_UNIX=$(cygpath "$_APPDATA_UNIX" 2>/dev/null || echo "$_APPDATA_UNIX")
-fi
-CLAUDE_DESKTOP_DIR="$_APPDATA_UNIX/Claude/claude-code"
-if [[ -d "$CLAUDE_DESKTOP_DIR" ]]; then
-  CLAUDE_LATEST=$(ls -d "$CLAUDE_DESKTOP_DIR"/*/ 2>/dev/null | sort -V | tail -1)
-  if [[ -n "$CLAUDE_LATEST" ]]; then
-    export PATH="$PATH:$CLAUDE_LATEST"
-  fi
-fi
-
-CLAUDE_CMD=""
-if command -v claude &>/dev/null; then
-  CLAUDE_CMD="claude"
-elif command -v claude.exe &>/dev/null; then
-  CLAUDE_CMD="claude.exe"
-else
-  echo "ERROR: Claude Code CLI not found."
-  exit 1
-fi
-
-echo ""
-echo "=== Phase 1: Generate Tests ==="
-echo "Running Playwright Test Generator agent..."
-echo ""
-
-cd "$PLAYWRIGHT_DIR"
-
-# Invoke the Generator agent via Claude Code to create tests from the plan
-$CLAUDE_CMD --print --dangerously-skip-permissions -p "
-You are running the Playwright Test Generator agent.
-
-Read the test plan at specs/plans/chat-app-features.md.
-For each test scenario in the plan:
-1. Use generator_setup_page to open the app
-2. Execute each step using the Playwright MCP tools (browser_click, browser_type, browser_snapshot, etc.)
-3. Read the generator log with generator_read_log
-4. Write the test with generator_write_test
-
-The app is running at $APP_URL. Generate tests for all scenarios in the plan.
-Important: Use browser_snapshot to inspect the DOM before interacting — do NOT guess selectors.
-" 2>&1 | tee "$APP_DIR/agent-generator-output.log"
-
-echo ""
-echo "=== Phase 2: Run Generated Tests ==="
-
-# Run whatever tests were generated
-APP_URL="$APP_URL" npx playwright test --reporter=json \
-  1>/tmp/pw-agent-results.json 2>/dev/null || true
-
-RESULTS_SIZE=$(wc -c < /tmp/pw-agent-results.json 2>/dev/null || echo "0")
-
-if [[ "$RESULTS_SIZE" -gt 100 ]]; then
-  echo ""
-  echo "=== Phase 3: Parse Results ==="
-
-  PW_RESULTS="/tmp/pw-agent-results.json"
-  APP_DIR_NATIVE="$APP_DIR"
-  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
-    PW_RESULTS=$(cygpath -w "$PW_RESULTS")
-    APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
-  fi
-
-  node "$SCRIPT_DIR/parse-playwright-results.mjs" "$PW_RESULTS" "$APP_DIR_NATIVE" "$GRADE_BACKEND"
-
-  echo ""
-  echo "=== Results ==="
-  echo "  GRADING_RESULTS.md: $APP_DIR"
-  echo "  Generator log: $APP_DIR/agent-generator-output.log"
-else
-  echo "WARNING: No test results produced."
-  echo "Check the generator output: $APP_DIR/agent-generator-output.log"
-fi
-
-cd "$SCRIPT_DIR"
diff --git a/tools/llm-sequential-upgrade/grade-playwright.sh b/tools/llm-sequential-upgrade/grade-playwright.sh
deleted file mode 100644
index 73e5ed5d397..00000000000
--- a/tools/llm-sequential-upgrade/grade-playwright.sh
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/bin/bash
-# Sequential Upgrade — Playwright Grading
-#
-# Runs deterministic Playwright tests against a deployed app and generates
-# GRADING_RESULTS.md. This is an alternative to the Chrome MCP grading agent.
-#
-# Usage:
-#   ./grade-playwright.sh <app-dir>
-#   ./grade-playwright.sh sequential-upgrade/sequential-upgrade-20260401/results/spacetime/chat-app-20260401-123403
-#
-# Prerequisites:
-#   cd test-plans/playwright && npm install && npx playwright install chromium
-
-set -euo pipefail
-
-APP_DIR="${1:?Usage: ./grade-playwright.sh <app-dir>}"
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PLAYWRIGHT_DIR="$SCRIPT_DIR/test-plans/playwright"
-
-if [[ ! -d "$APP_DIR" ]]; then
-  echo "ERROR: App directory not found: $APP_DIR"
-  exit 1
-fi
-
-# Check Playwright is installed
-if [[ ! -f "$PLAYWRIGHT_DIR/node_modules/.bin/playwright" ]]; then
-  echo "ERROR: Playwright not installed."
-  echo "Run: cd test-plans/playwright && npm install && npx playwright install chromium"
-  exit 1
-fi
-
-# Auto-detect backend from app directory structure
-if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
-  GRADE_BACKEND="spacetime"
-  DEFAULT_PORT=5173
-elif [[ -d "$APP_DIR/server" ]]; then
-  GRADE_BACKEND="postgres"
-  DEFAULT_PORT=5174
-else
-  GRADE_BACKEND="unknown"
-  DEFAULT_PORT=5173
-fi
-
-# Try to read the port from telemetry metadata (set by --run-index)
-VITE_PORT="$DEFAULT_PORT"
-# Walk up from app dir to find telemetry metadata
-RUN_BASE="$(cd "$APP_DIR/../../.." 2>/dev/null && pwd)"
-if [[ -d "$RUN_BASE/telemetry" ]]; then
-  # Find the most recent metadata.json for this backend
-  LATEST_META=$(find "$RUN_BASE/telemetry" -name "metadata.json" -path "*$GRADE_BACKEND*" -exec ls -t {} + 2>/dev/null | head -1)
-  if [[ -n "$LATEST_META" ]]; then
-    META_PORT=$(node -e "const m=JSON.parse(require('fs').readFileSync(process.argv[1],'utf-8')); process.stdout.write(String(m.vitePort||''))" -- "$(cygpath -w "$LATEST_META" 2>/dev/null || echo "$LATEST_META")" 2>/dev/null)
-    if [[ -n "$META_PORT" ]]; then
-      VITE_PORT="$META_PORT"
-    fi
-  fi
-fi
-
-APP_URL="http://localhost:$VITE_PORT"
-
-echo "=== Sequential Upgrade: Playwright Grade ==="
-echo "  App dir:  $APP_DIR"
-echo "  Backend:  $GRADE_BACKEND (port $VITE_PORT)"
-echo "  URL:      $APP_URL"
-echo ""
-
-# Reset backend state for a clean test
-echo "Resetting backend state..."
-"$SCRIPT_DIR/reset-app.sh" "$APP_DIR" || echo "WARNING: Backend reset failed"
-sleep 3
-
-# Run Playwright tests (BrowserContext isolation handles multi-user — no second server needed)
-cd "$PLAYWRIGHT_DIR"
-APP_URL="$APP_URL" npx playwright test --reporter=json 2>&1 | tee test-results/raw-output.json || true
-
-# Parse results into GRADING_RESULTS.md
-if [[ -f "test-results/results.json" ]]; then
-  echo ""
-  echo "Parsing Playwright results..."
-
-  # On Windows, convert paths for Node.js
-  APP_DIR_NATIVE="$APP_DIR"
-  RESULTS_FILE="$PLAYWRIGHT_DIR/test-results/results.json"
-  if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
-    APP_DIR_NATIVE=$(cygpath -w "$APP_DIR")
-    RESULTS_FILE=$(cygpath -w "$RESULTS_FILE")
-  fi
-
-  node "$SCRIPT_DIR/parse-playwright-results.mjs" "$RESULTS_FILE" "$APP_DIR_NATIVE" "$GRADE_BACKEND"
-
-  echo ""
-  echo "=== Results ==="
-  echo "  GRADING_RESULTS.md written to: $APP_DIR"
-else
-  echo "ERROR: No Playwright results found at test-results/results.json"
-  exit 1
-fi
diff --git a/tools/llm-sequential-upgrade/grade.sh b/tools/llm-sequential-upgrade/grade.sh
index 6f6bd7ff922..3b8e6b129f1 100644
--- a/tools/llm-sequential-upgrade/grade.sh
+++ b/tools/llm-sequential-upgrade/grade.sh
@@ -52,16 +52,34 @@ echo "This launches an INTERACTIVE Claude Code session with Chrome MCP."
 echo "It will test the deployed app, write bug reports, and grade features."
 echo ""
 
-# Auto-detect backend from app directory structure
-if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+# Auto-detect backend. Prefer the marker run.sh writes at generate time; fall
+# back to directory shape. The marker is the only reliable way to tell postgres
+# and mongodb apart (both use a server/ dir).
+if [[ -f "$APP_DIR/.benchmark-backend" ]]; then
+  GRADE_BACKEND="$(tr -d '[:space:]' < "$APP_DIR/.benchmark-backend")"
+elif [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
   GRADE_BACKEND="spacetime"
-  VITE_PORT=5173
 elif [[ -d "$APP_DIR/server" ]]; then
   GRADE_BACKEND="postgres"
-  VITE_PORT=5174
 else
   GRADE_BACKEND="unknown"
-  VITE_PORT=5173
+fi
+
+# Resolve the Vite port the app was actually deployed on. Default to the
+# per-backend range used by run.sh (spacetime 6173 / postgres 6273 / mongodb 6373),
+# then override with the recorded vitePort from the run's metadata.json if present
+# (handles parallel runs with run-index port offsets).
+case "$GRADE_BACKEND" in
+  spacetime) VITE_PORT=6173 ;;
+  postgres)  VITE_PORT=6273 ;;
+  mongodb)   VITE_PORT=6373 ;;
+  *)         VITE_PORT=6173 ;;
+esac
+_META=$(ls -t "$APP_DIR"/../../telemetry/*/metadata.json 2>/dev/null | head -1)
+if [[ -n "$_META" ]]; then
+  _META_ARG=$(cygpath -w "$_META" 2>/dev/null || echo "$_META")
+  _VP=$(node -e "try{const j=JSON.parse(require('fs').readFileSync(process.argv[1],'utf8'));if(j.vitePort)process.stdout.write(String(j.vitePort));}catch(e){}" -- "$_META_ARG" 2>/dev/null || echo "")
+  [[ -n "$_VP" ]] && VITE_PORT="$_VP"
 fi
 echo "  Backend:  $GRADE_BACKEND (port $VITE_PORT)"
 
diff --git a/tools/llm-sequential-upgrade/parse-playwright-results.mjs b/tools/llm-sequential-upgrade/parse-playwright-results.mjs
deleted file mode 100644
index 53fad567f00..00000000000
--- a/tools/llm-sequential-upgrade/parse-playwright-results.mjs
+++ /dev/null
@@ -1,169 +0,0 @@
-#!/usr/bin/env node
-
-/**
- * Converts Playwright JSON reporter output into GRADING_RESULTS.md
- * matching the format used by the Chrome MCP grading agent.
- *
- * Usage:
- *   node parse-playwright-results.mjs <results.json> <app-dir> <backend>
- */
-
-import fs from 'fs';
-import path from 'path';
-
-const resultsFile = process.argv[2];
-const appDir = process.argv[3];
-const backend = process.argv[4] || 'unknown';
-
-if (!resultsFile || !appDir) {
-  console.error('Usage: node parse-playwright-results.mjs <results.json> <app-dir> <backend>');
-  process.exit(1);
-}
-
-const results = JSON.parse(fs.readFileSync(resultsFile, 'utf-8'));
-
-// Feature name mapping: spec file name → feature number and name
-const FEATURES = {
-  'feature-01-basic-chat': { num: 1, name: 'Basic Chat' },
-  'feature-02-typing-indicators': { num: 2, name: 'Typing Indicators' },
-  'feature-03-read-receipts': { num: 3, name: 'Read Receipts' },
-  'feature-04-unread-counts': { num: 4, name: 'Unread Counts' },
-  'feature-05-scheduled-messages': { num: 5, name: 'Scheduled Messages' },
-  'feature-06-ephemeral-messages': { num: 6, name: 'Ephemeral Messages' },
-  'feature-07-reactions': { num: 7, name: 'Message Reactions' },
-  'feature-08-edit-history': { num: 8, name: 'Message Editing with History' },
-  'feature-09-permissions': { num: 9, name: 'Real-Time Permissions' },
-  'feature-10-presence': { num: 10, name: 'Rich User Presence' },
-  'feature-11-threading': { num: 11, name: 'Message Threading' },
-  'feature-12-private-rooms': { num: 12, name: 'Private Rooms & DMs' },
-  'feature-13-activity-indicators': { num: 13, name: 'Room Activity Indicators' },
-  'feature-14-draft-sync': { num: 14, name: 'Draft Sync' },
-  'feature-15-anonymous-migration': { num: 15, name: 'Anonymous to Registered Migration' },
-  'feature-16-pinned-messages': { num: 16, name: 'Pinned Messages' },
-  'feature-17-user-profiles': { num: 17, name: 'User Profiles' },
-  'feature-18-mentions-notifications': { num: 18, name: '@Mentions and Notifications' },
-  'feature-19-bookmarked-messages': { num: 19, name: 'Bookmarked/Saved Messages' },
-  'feature-20-message-forwarding': { num: 20, name: 'Message Forwarding' },
-  'feature-21-slow-mode': { num: 21, name: 'Slow Mode' },
-  'feature-22-polls': { num: 22, name: 'Polls' },
-};
-
-// Parse suites → extract test results per feature
-const featureResults = {};
-
-function walkSuites(suites) {
-  for (const suite of suites) {
-    // Match spec file name to feature
-    const specFile = suite.file || '';
-    const featureKey = Object.keys(FEATURES).find((k) => specFile.includes(k));
-
-    if (featureKey && suite.specs) {
-      if (!featureResults[featureKey]) {
-        featureResults[featureKey] = { passed: 0, failed: 0, skipped: 0, tests: [] };
-      }
-      for (const spec of suite.specs) {
-        for (const test of spec.tests || []) {
-          const status = test.status || test.results?.[0]?.status || 'unknown';
-          const testInfo = {
-            title: spec.title,
-            status,
-            duration: test.results?.[0]?.duration || 0,
-          };
-          featureResults[featureKey].tests.push(testInfo);
-          if (status === 'expected' || status === 'passed') {
-            featureResults[featureKey].passed++;
-          } else if (status === 'skipped') {
-            featureResults[featureKey].skipped++;
-          } else {
-            featureResults[featureKey].failed++;
-          }
-        }
-      }
-    }
-
-    if (suite.suites) {
-      walkSuites(suite.suites);
-    }
-  }
-}
-
-walkSuites(results.suites || []);
-
-// Calculate scores: 3 points per feature, proportional to pass rate
-// Skipped tests don't count toward total (they're unimplemented)
-function calcScore(fr) {
-  const total = fr.passed + fr.failed;
-  if (total === 0) return 0; // all skipped = 0
-  const ratio = fr.passed / total;
-  if (ratio >= 1.0) return 3;
-  if (ratio >= 0.66) return 2;
-  if (ratio >= 0.33) return 1;
-  return 0;
-}
-
-// Generate report
-const date = new Date().toISOString().slice(0, 10);
-let totalScore = 0;
-let totalMax = 0;
-const featureLines = [];
-const summaryRows = [];
-
-for (const [key, feat] of Object.entries(FEATURES)) {
-  const fr = featureResults[key];
-  if (!fr) continue; // skip features that weren't tested (not in the spec files run)
-  const score = calcScore(fr);
-  totalScore += score;
-  totalMax += 3;
-
-  const testDetails = fr
-    ? fr.tests
-        .map((t) => {
-          const icon = t.status === 'expected' || t.status === 'passed' ? 'x' : ' ';
-          return `- [${icon}] ${t.title} (${t.status}, ${t.duration}ms)`;
-        })
-        .join('\n')
-    : '- [ ] No tests ran';
-
-  featureLines.push(`## Feature ${feat.num}: ${feat.name} (Score: ${score} / 3)\n\n${testDetails}\n`);
-  const notes = fr
-    ? `${fr.passed}/${fr.passed + fr.failed} passed, ${fr.skipped} skipped`
-    : 'No tests';
-  summaryRows.push(
-    `| ${feat.num}. ${feat.name} | 3 | ${score} | ${notes} |`
-  );
-}
-
-const report = `# Chat App Grading Results
-
-**Model:** Playwright (automated)
-**Date:** ${date}
-**Backend:** ${backend}
-**Grading Method:** Playwright automated tests
-
----
-
-## Overall Metrics
-
-| Metric                  | Value                          |
-| ----------------------- | ------------------------------ |
-| **Features Evaluated**  | 1-15                           |
-| **Total Feature Score** | ${totalScore} / ${totalMax}    |
-
----
-
-${featureLines.join('\n---\n\n')}
-
----
-
-## Summary Score Sheet
-
-| Feature | Max | Score | Notes |
-|---------|-----|-------|-------|
-${summaryRows.join('\n')}
-| **TOTAL** | **${totalMax}** | **${totalScore}** | |
-`;
-
-const outputPath = path.join(appDir, 'GRADING_RESULTS.md');
-fs.writeFileSync(outputPath, report);
-console.log(`GRADING_RESULTS.md written to: ${outputPath}`);
-console.log(`Total score: ${totalScore}/${totalMax}`);
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md
index 67813bb4120..8ef64d16fb7 100644
--- a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md
+++ b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/METHODOLOGY.md
@@ -38,9 +38,47 @@ Same features as AI-generated. Implementation changes only:
 - Room activity broadcast: kept
 - Response sent immediately after insert instead of after user lookup
 
+## What changed (MongoDB 20260616)
+
+Same features as AI-generated. Implementation changes only (produced by a clean
+`claude-sonnet-4-6` first-principles pass — goal-only prompt, no access to the PG/STDB
+optimized references; it independently chose a different optimization set):
+- Mongo connection pool: `maxPoolSize` 5 → 20 (default pool was the bottleneck under burst load)
+- `POST /messages`: send the HTTP response right after the DB insert; defer the socket fan-out
+- `trackMessageActivity`: deferred global emit via `setImmediate`; `Date[]` → `number[]`; amortised trim instead of per-message `filter()`
+- `getActivityLevel`: single counting loop instead of two `filter()` allocations
+- Read-only list endpoints (`GET /messages`, ephemeral cleanup): added `.lean()` (skip Mongoose hydration)
+- Socket.io `perMessageDeflate: false` (compression overhead > savings for small chat payloads)
+- Added compound index `{ roomId: 1, parentId: 1, createdAt: 1 }` to satisfy the room-message query+sort in one B-tree scan
+- All features, validation, API/Socket.io contract, and data model: kept
+
 ## Benchmark results (averaged across 2 runs)
 
 | Version | STDB avg | PG avg | Ratio |
 |------|----------|--------|-------|
 | Raw | 5,267 msgs/sec | 694 msgs/sec | 7.6x |
 | Optimized (this dir) | 25,278 msgs/sec | 1,139 msgs/sec | 22x |
+
+### MongoDB (added 20260616 — separate sitting, read caveats)
+
+Stress throughput (writer-count sweep; peak shown), measured on the `20260616` machine:
+
+| Version | Mongo peak | vs optimized STDB |
+|------|------------|-------------------|
+| Raw | ~800 msgs/sec (peak 796 @ 200 writers) | — |
+| Optimized | ~1,400 msgs/sec (peak 1,394 @ 100 writers) | ~18x slower |
+
+Optimization gain ~1.7x — in line with PG's 1.6x (both hand-built stacks gain modestly;
+STDB's 4.8x reflects more architectural headroom). Optimized Mongo (~1,400) ≈ optimized
+PG (1,139); both ~18–22x under optimized STDB (25,278).
+
+**Caveats (do not drop these when citing the Mongo numbers):**
+1. **Cross-machine / cross-run.** STDB & PG figures are from the original `20260406` run;
+   the Mongo figures are from `20260616` on a different machine. The *within-Mongo* ratio
+   (raw→optimized, ~1.7x) is clean; absolute cross-backend numbers are not strictly
+   same-sitting. A same-machine re-run of all three would remove this.
+2. **PG is throttle-bound.** The PG app keeps a 500ms/user send rate limit (even when
+   "optimized"); the Mongo app has none. Mongo edging PG on throughput is partly that.
+3. **Optimization-prompt parity.** The original PG/STDB optimization prompt was not saved.
+   The Mongo pass used a reconstructed *goal-only* prompt (state the objective, let the
+   model find the wins) — same spirit, not the same wording.
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/mongo-index-optimized.ts b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/mongo-index-optimized.ts
new file mode 100644
index 00000000000..429114d8aaa
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/mongo-index-optimized.ts
@@ -0,0 +1,887 @@
+import 'dotenv/config';
+import express, { Request, Response } from 'express';
+import { createServer } from 'http';
+import { Server } from 'socket.io';
+import cors from 'cors';
+import mongoose from 'mongoose';
+import { User, Room, Message, ScheduledMessage, Invitation, Draft } from './models.js';
+
+const app = express();
+const httpServer = createServer(app);
+
+const io = new Server(httpServer, {
+  cors: {
+    origin: 'http://localhost:6373',
+    methods: ['GET', 'POST'],
+  },
+  // Increase per-socket buffer to avoid dropped events under burst load
+  perMessageDeflate: false,
+});
+
+app.use(cors({ origin: 'http://localhost:6373' }));
+// Use a larger JSON body limit only if needed; default 100kb is fine for chat
+app.use(express.json());
+
+const DB_URL = process.env.DATABASE_URL ?? 'mongodb://localhost:6437/chat-app';
+await mongoose.connect(DB_URL, {
+  // Allow more concurrent operations under heavy load
+  maxPoolSize: 20,
+  minPoolSize: 5,
+  // Reduce the time spent waiting for a connection from the pool
+  serverSelectionTimeoutMS: 5000,
+  socketTimeoutMS: 45000,
+});
+console.log('Connected to MongoDB');
+
+// roomId -> { userName -> timeout }
+const typingTimers = new Map<string, Map<string, ReturnType<typeof setTimeout>>>();
+
+// userName -> Set of active socketIds (tracks multi-tab presence)
+const userSockets = new Map<string, Set<string>>();
+
+// roomId -> array of recent message timestamps (for activity tracking)
+// We keep this as a plain number[] (ms since epoch) instead of Date[] to avoid
+// object allocation on every push and comparison.
+const roomActivityTimestamps = new Map<string, number[]>();
+// roomId -> last emitted activity level (to detect changes during decay)
+const lastEmittedActivityLevel = new Map<string, 'hot' | 'active' | ''>();
+
+function getActivityLevel(roomId: string): 'hot' | 'active' | '' {
+  const timestamps = roomActivityTimestamps.get(roomId);
+  if (!timestamps || timestamps.length === 0) return '';
+  const now = Date.now();
+  const cutoff5 = now - 5 * 60 * 1000;
+  const cutoff2 = now - 2 * 60 * 1000;
+  let count5 = 0;
+  let count2 = 0;
+  for (let i = 0; i < timestamps.length; i++) {
+    const t = timestamps[i];
+    if (t >= cutoff5) {
+      count5++;
+      if (t >= cutoff2) count2++;
+    }
+  }
+  if (count5 >= 5) return 'hot';
+  if (count2 >= 1) return 'active';
+  return '';
+}
+
+function trackMessageActivity(roomId: string): void {
+  let timestamps = roomActivityTimestamps.get(roomId);
+  if (!timestamps) {
+    timestamps = [];
+    roomActivityTimestamps.set(roomId, timestamps);
+  }
+  const now = Date.now();
+  timestamps.push(now);
+  // Lazy trim: only prune when array grows large (> 50 entries) to amortize allocation cost
+  if (timestamps.length > 50) {
+    const cutoff = now - 10 * 60 * 1000;
+    let start = 0;
+    while (start < timestamps.length && timestamps[start] < cutoff) start++;
+    if (start > 0) timestamps.splice(0, start);
+  }
+  const level = getActivityLevel(roomId);
+  lastEmittedActivityLevel.set(roomId, level);
+  // Emit asynchronously — do not block the HTTP response path
+  setImmediate(() => {
+    io.emit('room-activity', { roomId, level });
+  });
+}
+
+function clearTyping(roomId: string, userName: string): void {
+  const roomMap = typingTimers.get(roomId);
+  if (!roomMap) return;
+  const timer = roomMap.get(userName);
+  if (timer !== undefined) {
+    clearTimeout(timer);
+    roomMap.delete(userName);
+  }
+}
+
+function broadcastTyping(roomId: string): void {
+  const roomMap = typingTimers.get(roomId);
+  const users = roomMap ? [...roomMap.keys()] : [];
+  io.to(roomId).emit('typing-update', { roomId, typingUsers: users });
+}
+
+function emitToUsers(users: string[], event: string, data: unknown): void {
+  for (const user of users) {
+    const sockets = userSockets.get(user);
+    if (sockets) {
+      for (const socketId of sockets) {
+        io.to(socketId).emit(event, data);
+      }
+    }
+  }
+}
+
+function emitRoomUpdated(room: { _id: mongoose.Types.ObjectId | string; members: string[]; isPrivate?: boolean; isDM?: boolean }, data: unknown): void {
+  if (room.isPrivate || room.isDM) {
+    emitToUsers(room.members, 'room-updated', data);
+  } else {
+    io.emit('room-updated', data);
+  }
+}
+
+app.get('/api/health', (_req: Request, res: Response): void => {
+  res.json({ ok: true });
+});
+
+app.get('/api/rooms/activity', (_req: Request, res: Response): void => {
+  const activity: Record<string, 'hot' | 'active' | ''> = {};
+  for (const [roomId] of roomActivityTimestamps.entries()) {
+    const level = getActivityLevel(roomId);
+    if (level) activity[roomId] = level;
+  }
+  res.json({ activity });
+});
+
+app.post('/api/users', async (req: Request, res: Response): Promise<void> => {
+  const raw = req.body?.name;
+  const name = typeof raw === 'string' ? raw.trim().slice(0, 32) : '';
+  if (!name) {
+    res.status(400).json({ error: 'Name is required (max 32 chars)' });
+    return;
+  }
+  try {
+    let user = await User.findOne({ name });
+    if (!user) {
+      user = await User.create({ name });
+    }
+    res.json({ user: { id: user._id, name: user.name } });
+  } catch (err: unknown) {
+    const mongoErr = err as { code?: number };
+    if (mongoErr.code === 11000) {
+      const user = await User.findOne({ name });
+      res.json({ user: { id: user!._id, name: user!.name } });
+    } else {
+      res.status(500).json({ error: 'Server error' });
+    }
+  }
+});
+
+app.get('/api/users', async (_req: Request, res: Response): Promise<void> => {
+  const users = await User.find({}).select('name status lastSeen online');
+  res.json({ users });
+});
+
+app.patch('/api/users/:userName/status', async (req: Request, res: Response): Promise<void> => {
+  const { status } = req.body;
+  const validStatuses = ['online', 'away', 'dnd', 'invisible'];
+  if (!validStatuses.includes(status)) {
+    res.status(400).json({ error: 'Invalid status' });
+    return;
+  }
+  const updateFields: { status: string; lastSeen?: Date } = { status };
+  if (status === 'away' || status === 'invisible') updateFields.lastSeen = new Date();
+  const user = await User.findOneAndUpdate(
+    { name: req.params.userName },
+    updateFields,
+    { new: true }
+  );
+  if (!user) { res.status(404).json({ error: 'User not found' }); return; }
+  const allUsers = await User.find({}).select('name status lastSeen online');
+  io.emit('online-users', { users: allUsers });
+  res.json({ user });
+});
+
+app.get('/api/rooms', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.query.userName === 'string' ? req.query.userName.trim() : '';
+  let rooms;
+  if (userName) {
+    rooms = await Room.find({
+      $or: [
+        { isPrivate: false, isDM: { $ne: true } },
+        { members: userName },
+      ],
+    }).sort({ createdAt: 1 });
+  } else {
+    rooms = await Room.find({ isPrivate: { $ne: true }, isDM: { $ne: true } }).sort({ createdAt: 1 });
+  }
+  res.json({ rooms });
+});
+
+app.post('/api/rooms', async (req: Request, res: Response): Promise<void> => {
+  const name = typeof req.body?.name === 'string' ? req.body.name.trim().slice(0, 64) : '';
+  const createdBy = typeof req.body?.createdBy === 'string' ? req.body.createdBy.trim() : '';
+  const isPrivate = req.body?.isPrivate === true;
+  if (!name || !createdBy) {
+    res.status(400).json({ error: 'name and createdBy are required' });
+    return;
+  }
+  try {
+    const room = await Room.create({ name, createdBy, members: [createdBy], admins: [createdBy], isPrivate });
+    if (isPrivate) {
+      emitToUsers([createdBy], 'room-created', { room });
+    } else {
+      io.emit('room-created', { room });
+    }
+    res.json({ room });
+  } catch (err: unknown) {
+    const mongoErr = err as { code?: number };
+    if (mongoErr.code === 11000) {
+      res.status(409).json({ error: 'Room name already taken' });
+    } else {
+      res.status(500).json({ error: 'Server error' });
+    }
+  }
+});
+
+app.post('/api/rooms/:roomId/join', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.body?.userName === 'string' ? req.body.userName.trim() : '';
+  if (!userName) { res.status(400).json({ error: 'userName required' }); return; }
+  const existing = await Room.findById(req.params.roomId);
+  if (!existing) { res.status(404).json({ error: 'Room not found' }); return; }
+  if ((existing.banned ?? []).includes(userName)) {
+    res.status(403).json({ error: 'You have been banned from this room' });
+    return;
+  }
+  if (existing.isPrivate || existing.isDM) {
+    res.status(403).json({ error: 'This is a private room. Request an invitation.' });
+    return;
+  }
+  const room = await Room.findByIdAndUpdate(
+    req.params.roomId,
+    { $addToSet: { members: userName } },
+    { new: true }
+  );
+  io.emit('room-updated', { room });
+  res.json({ room });
+});
+
+app.post('/api/rooms/:roomId/leave', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.body?.userName === 'string' ? req.body.userName.trim() : '';
+  if (!userName) { res.status(400).json({ error: 'userName required' }); return; }
+  const room = await Room.findByIdAndUpdate(
+    req.params.roomId,
+    { $pull: { members: userName } },
+    { new: true }
+  );
+  if (!room) { res.status(404).json({ error: 'Room not found' }); return; }
+  emitRoomUpdated(room, { room });
+  res.json({ room });
+});
+
+app.get('/api/rooms/:roomId/messages', async (req: Request, res: Response): Promise<void> => {
+  // Use lean() to skip Mongoose hydration for a read-only endpoint — returns plain JS objects,
+  // which is significantly faster under load than full Document instances.
+  const messages = await Message.find({ roomId: req.params.roomId, parentId: null })
+    .sort({ createdAt: 1 })
+    .limit(100)
+    .lean();
+  res.json({ messages });
+});
+
+app.post('/api/rooms/:roomId/messages', async (req: Request, res: Response): Promise<void> => {
+  const sender = typeof req.body?.sender === 'string' ? req.body.sender.trim() : '';
+  const text = typeof req.body?.text === 'string' ? req.body.text.trim().slice(0, 2000) : '';
+  if (!sender || !text) {
+    res.status(400).json({ error: 'sender and text are required' });
+    return;
+  }
+  const ttlSecondsRaw = req.body?.ttlSeconds;
+  const ttlSeconds = typeof ttlSecondsRaw === 'number' && ttlSecondsRaw > 0 ? Math.min(ttlSecondsRaw, 86400) : null;
+  const expiresAt = ttlSeconds ? new Date(Date.now() + ttlSeconds * 1000) : undefined;
+  const msg = await Message.create({
+    roomId: req.params.roomId,
+    sender,
+    text,
+    readBy: [sender],
+    ...(expiresAt ? { expiresAt } : {}),
+  });
+
+  // Respond to the HTTP client immediately before broadcasting — this minimises
+  // measured POST latency since the client only needs the created message object.
+  res.json({ message: msg });
+
+  // Broadcast and activity tracking happen after the response is flushed.
+  // setImmediate defers until the current I/O event completes, ensuring the
+  // response write is queued to the socket first.
+  setImmediate(() => {
+    io.to(req.params.roomId).emit('message', { message: msg });
+    trackMessageActivity(req.params.roomId);
+  });
+});
+
+app.post('/api/rooms/:roomId/read', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.body?.userName === 'string' ? req.body.userName.trim() : '';
+  if (!userName) { res.status(400).json({ error: 'userName required' }); return; }
+  const roomId = req.params.roomId;
+  await Message.updateMany(
+    { roomId, readBy: { $ne: userName } },
+    { $addToSet: { readBy: userName } }
+  );
+  const messages = await Message.find({ roomId, parentId: null }).sort({ createdAt: 1 }).limit(100);
+  io.to(roomId).emit('read-receipts-updated', { roomId, messages });
+  res.json({ ok: true });
+});
+
+app.get('/api/rooms/:roomId/unread', async (req: Request, res: Response): Promise<void> => {
+  const userName = req.query.userName;
+  if (typeof userName !== 'string' || !userName) {
+    res.status(400).json({ error: 'userName query param required' });
+    return;
+  }
+  const count = await Message.countDocuments({
+    roomId: req.params.roomId,
+    sender: { $ne: userName },
+    readBy: { $ne: userName },
+  });
+  res.json({ count });
+});
+
+app.post('/api/rooms/:roomId/scheduled', async (req: Request, res: Response): Promise<void> => {
+  const sender = typeof req.body?.sender === 'string' ? req.body.sender.trim() : '';
+  const text = typeof req.body?.text === 'string' ? req.body.text.trim().slice(0, 2000) : '';
+  const scheduledAtRaw = req.body?.scheduledAt;
+  if (!sender || !text || !scheduledAtRaw) {
+    res.status(400).json({ error: 'sender, text, and scheduledAt are required' });
+    return;
+  }
+  const scheduledAt = new Date(scheduledAtRaw as string);
+  if (isNaN(scheduledAt.getTime()) || scheduledAt <= new Date()) {
+    res.status(400).json({ error: 'scheduledAt must be a future date' });
+    return;
+  }
+  const scheduled = await ScheduledMessage.create({ roomId: req.params.roomId, sender, text, scheduledAt });
+  res.json({ scheduled });
+});
+
+app.get('/api/rooms/:roomId/scheduled', async (req: Request, res: Response): Promise<void> => {
+  const userName = req.query.userName;
+  if (typeof userName !== 'string' || !userName) {
+    res.status(400).json({ error: 'userName query param required' });
+    return;
+  }
+  const scheduled = await ScheduledMessage.find({
+    roomId: req.params.roomId,
+    sender: userName,
+    sent: false,
+  }).sort({ scheduledAt: 1 });
+  res.json({ scheduled });
+});
+
+app.delete('/api/scheduled/:id', async (req: Request, res: Response): Promise<void> => {
+  await ScheduledMessage.findByIdAndDelete(req.params.id);
+  res.json({ ok: true });
+});
+
+app.patch('/api/messages/:messageId', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.body?.userName === 'string' ? req.body.userName.trim() : '';
+  const newText = typeof req.body?.text === 'string' ? req.body.text.trim().slice(0, 2000) : '';
+  if (!userName || !newText) {
+    res.status(400).json({ error: 'userName and text are required' });
+    return;
+  }
+  const msg = await Message.findById(req.params.messageId);
+  if (!msg) { res.status(404).json({ error: 'Message not found' }); return; }
+  if (msg.sender !== userName) { res.status(403).json({ error: 'Cannot edit another user\'s message' }); return; }
+  msg.editHistory.push({ text: msg.text, editedAt: new Date() });
+  msg.text = newText;
+  msg.isEdited = true;
+  await msg.save();
+  io.to(msg.roomId.toString()).emit('message-updated', { message: msg });
+  res.json({ message: msg });
+});
+
+app.post('/api/rooms/:roomId/kick', async (req: Request, res: Response): Promise<void> => {
+  const adminUser = typeof req.body?.adminUser === 'string' ? req.body.adminUser.trim() : '';
+  const targetUser = typeof req.body?.targetUser === 'string' ? req.body.targetUser.trim() : '';
+  if (!adminUser || !targetUser) {
+    res.status(400).json({ error: 'adminUser and targetUser are required' });
+    return;
+  }
+  const room = await Room.findById(req.params.roomId);
+  if (!room) { res.status(404).json({ error: 'Room not found' }); return; }
+  if (!(room.admins ?? []).includes(adminUser)) { res.status(403).json({ error: 'Not an admin' }); return; }
+  if ((room.admins ?? []).includes(targetUser)) { res.status(400).json({ error: 'Cannot kick an admin' }); return; }
+
+  room.members = room.members.filter((m) => m !== targetUser);
+  if (!(room.banned ?? []).includes(targetUser)) room.banned.push(targetUser);
+  await room.save();
+
+  const kickedSockets = userSockets.get(targetUser);
+  if (kickedSockets) {
+    for (const socketId of kickedSockets) {
+      const kickedSocket = io.sockets.sockets.get(socketId);
+      if (kickedSocket) {
+        kickedSocket.leave(req.params.roomId);
+        kickedSocket.emit('kicked-from-room', { roomId: req.params.roomId, roomName: room.name });
+      }
+    }
+  }
+
+  emitRoomUpdated(room, { room });
+  res.json({ room });
+});
+
+app.post('/api/rooms/:roomId/promote', async (req: Request, res: Response): Promise<void> => {
+  const adminUser = typeof req.body?.adminUser === 'string' ? req.body.adminUser.trim() : '';
+  const targetUser = typeof req.body?.targetUser === 'string' ? req.body.targetUser.trim() : '';
+  if (!adminUser || !targetUser) {
+    res.status(400).json({ error: 'adminUser and targetUser are required' });
+    return;
+  }
+  const room = await Room.findById(req.params.roomId);
+  if (!room) { res.status(404).json({ error: 'Room not found' }); return; }
+  if (!(room.admins ?? []).includes(adminUser)) { res.status(403).json({ error: 'Not an admin' }); return; }
+  if (!(room.admins ?? []).includes(targetUser)) room.admins.push(targetUser);
+  await room.save();
+
+  emitRoomUpdated(room, { room });
+  res.json({ room });
+});
+
+app.get('/api/messages/:messageId/thread', async (req: Request, res: Response): Promise<void> => {
+  const replies = await Message.find({ parentId: req.params.messageId }).sort({ createdAt: 1 });
+  res.json({ replies });
+});
+
+app.post('/api/messages/:messageId/reply', async (req: Request, res: Response): Promise<void> => {
+  const sender = typeof req.body?.sender === 'string' ? req.body.sender.trim() : '';
+  const text = typeof req.body?.text === 'string' ? req.body.text.trim().slice(0, 2000) : '';
+  if (!sender || !text) { res.status(400).json({ error: 'sender and text are required' }); return; }
+  const parent = await Message.findById(req.params.messageId);
+  if (!parent) { res.status(404).json({ error: 'Message not found' }); return; }
+  const reply = await Message.create({
+    roomId: parent.roomId,
+    sender,
+    text,
+    readBy: [sender],
+    parentId: parent._id,
+  });
+  parent.replyCount = (parent.replyCount ?? 0) + 1;
+  parent.lastReplyPreview = text.slice(0, 100);
+  parent.lastReplySender = sender;
+  await parent.save();
+  const roomId = parent.roomId.toString();
+  io.to(roomId).emit('thread-updated', {
+    parentId: parent._id.toString(),
+    replyCount: parent.replyCount,
+    lastReplyPreview: parent.lastReplyPreview,
+    lastReplySender: parent.lastReplySender,
+  });
+  io.to(`thread-${parent._id.toString()}`).emit('thread-reply', { reply });
+  res.json({ reply });
+});
+
+app.post('/api/messages/:messageId/react', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.body?.userName === 'string' ? req.body.userName.trim() : '';
+  const emoji = typeof req.body?.emoji === 'string' ? req.body.emoji.trim() : '';
+  if (!userName || !emoji) {
+    res.status(400).json({ error: 'userName and emoji are required' });
+    return;
+  }
+  const msg = await Message.findById(req.params.messageId);
+  if (!msg) { res.status(404).json({ error: 'Message not found' }); return; }
+
+  const entry = msg.reactions.find((r) => r.emoji === emoji);
+  if (entry) {
+    const idx = entry.users.indexOf(userName);
+    if (idx >= 0) entry.users.splice(idx, 1);
+    else entry.users.push(userName);
+  } else {
+    msg.reactions.push({ emoji, users: [userName] });
+  }
+  msg.reactions = msg.reactions.filter((r) => r.users.length > 0);
+  await msg.save();
+  io.to(msg.roomId.toString()).emit('reaction-updated', { message: msg });
+  res.json({ message: msg });
+});
+
+// Private room invitation endpoints
+app.post('/api/rooms/:roomId/invite', async (req: Request, res: Response): Promise<void> => {
+  const invitedBy = typeof req.body?.invitedBy === 'string' ? req.body.invitedBy.trim() : '';
+  const invitedUser = typeof req.body?.invitedUser === 'string' ? req.body.invitedUser.trim() : '';
+  if (!invitedBy || !invitedUser) {
+    res.status(400).json({ error: 'invitedBy and invitedUser are required' });
+    return;
+  }
+  if (invitedBy === invitedUser) {
+    res.status(400).json({ error: 'Cannot invite yourself' });
+    return;
+  }
+  const room = await Room.findById(req.params.roomId);
+  if (!room) { res.status(404).json({ error: 'Room not found' }); return; }
+  if (!room.members.includes(invitedBy)) { res.status(403).json({ error: 'Not a member of this room' }); return; }
+  if (room.members.includes(invitedUser)) { res.status(400).json({ error: 'User is already a member' }); return; }
+
+  const target = await User.findOne({ name: invitedUser });
+  if (!target) { res.status(404).json({ error: 'User not found' }); return; }
+
+  const existing = await Invitation.findOne({ roomId: room._id, invitedUser, status: 'pending' });
+  if (existing) { res.status(400).json({ error: 'User already has a pending invitation' }); return; }
+
+  const invitation = await Invitation.create({
+    roomId: room._id,
+    roomName: room.isDM ? invitedBy : room.name,
+    invitedUser,
+    invitedBy,
+  });
+
+  emitToUsers([invitedUser], 'invitation-received', { invitation });
+  res.json({ invitation });
+});
+
+app.get('/api/invitations', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.query.userName === 'string' ? req.query.userName.trim() : '';
+  if (!userName) { res.status(400).json({ error: 'userName required' }); return; }
+  const invitations = await Invitation.find({ invitedUser: userName, status: 'pending' }).sort({ createdAt: -1 });
+  res.json({ invitations });
+});
+
+app.post('/api/invitations/:id/accept', async (req: Request, res: Response): Promise<void> => {
+  const invitation = await Invitation.findById(req.params.id);
+  if (!invitation) { res.status(404).json({ error: 'Invitation not found' }); return; }
+  if (invitation.status !== 'pending') { res.status(400).json({ error: 'Invitation already processed' }); return; }
+
+  const room = await Room.findByIdAndUpdate(
+    invitation.roomId,
+    { $addToSet: { members: invitation.invitedUser } },
+    { new: true }
+  );
+  if (!room) { res.status(404).json({ error: 'Room not found' }); return; }
+
+  invitation.status = 'accepted';
+  await invitation.save();
+
+  // Auto-join the accepted user's sockets to the room and notify them
+  const userSocketIds = userSockets.get(invitation.invitedUser);
+  if (userSocketIds) {
+    for (const sid of userSocketIds) {
+      const sock = io.sockets.sockets.get(sid);
+      if (sock) {
+        sock.join(room._id.toString());
+        sock.emit('room-accessible', { room });
+      }
+    }
+  }
+
+  // Notify all members (who are in the room socket channel) of the updated member list
+  io.to(room._id.toString()).emit('room-updated', { room });
+
+  res.json({ room });
+});
+
+app.post('/api/invitations/:id/decline', async (req: Request, res: Response): Promise<void> => {
+  const invitation = await Invitation.findByIdAndUpdate(
+    req.params.id,
+    { status: 'declined' },
+    { new: true }
+  );
+  if (!invitation) { res.status(404).json({ error: 'Invitation not found' }); return; }
+  res.json({ ok: true });
+});
+
+// Create or retrieve a DM room between two users
+app.post('/api/dm', async (req: Request, res: Response): Promise<void> => {
+  const user1 = typeof req.body?.user1 === 'string' ? req.body.user1.trim() : '';
+  const user2 = typeof req.body?.user2 === 'string' ? req.body.user2.trim() : '';
+  if (!user1 || !user2 || user1 === user2) {
+    res.status(400).json({ error: 'user1 and user2 are required and must be different' });
+    return;
+  }
+  const dmUsers = [user1, user2].sort();
+  const dmName = `__dm__${dmUsers[0]}__${dmUsers[1]}`;
+
+  let room = await Room.findOne({ name: dmName });
+  if (!room) {
+    room = await Room.create({
+      name: dmName,
+      createdBy: user1,
+      members: dmUsers,
+      admins: [],
+      isPrivate: true,
+      isDM: true,
+      dmUsers,
+    });
+    // Notify both users about the new DM room
+    emitToUsers(dmUsers, 'room-created', { room });
+  }
+
+  // Auto-join both users' sockets to the DM room socket channel
+  for (const user of dmUsers) {
+    const sockets = userSockets.get(user);
+    if (sockets) {
+      for (const sid of sockets) {
+        const sock = io.sockets.sockets.get(sid);
+        if (sock) sock.join(room._id.toString());
+      }
+    }
+  }
+
+  res.json({ room });
+});
+
+async function generateAnonName(): Promise<string> {
+  const chars = 'ABCDEFGHJKLMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz23456789';
+  let name: string;
+  let exists: boolean;
+  do {
+    let suffix = '';
+    for (let i = 0; i < 6; i++) suffix += chars[Math.floor(Math.random() * chars.length)];
+    name = `Anon_${suffix}`;
+    exists = !!(await User.findOne({ name }));
+  } while (exists);
+  return name;
+}
+
+app.post('/api/anon-user', async (_req: Request, res: Response): Promise<void> => {
+  try {
+    const name = await generateAnonName();
+    const user = await User.create({ name, isAnonymous: true });
+    res.json({ user: { id: user._id, name: user.name } });
+  } catch {
+    res.status(500).json({ error: 'Failed to create guest session' });
+  }
+});
+
+app.post('/api/users/:userName/register', async (req: Request, res: Response): Promise<void> => {
+  const anonName = req.params.userName;
+  const newName = typeof req.body?.newName === 'string' ? req.body.newName.trim().slice(0, 32) : '';
+  if (!newName) { res.status(400).json({ error: 'newName is required' }); return; }
+
+  const anonUser = await User.findOne({ name: anonName });
+  if (!anonUser) { res.status(404).json({ error: 'User not found' }); return; }
+  if (!anonUser.isAnonymous) { res.status(400).json({ error: 'User is already registered' }); return; }
+
+  const existing = await User.findOne({ name: newName });
+  if (existing) { res.status(409).json({ error: 'Username already taken' }); return; }
+
+  // Find DM rooms before updating so we can rename them
+  const dmRooms = await Room.find({ isDM: true, dmUsers: anonName });
+
+  // Migrate messages
+  await Message.updateMany({ sender: anonName }, { $set: { sender: newName } });
+  await Message.updateMany(
+    { readBy: anonName },
+    { $set: { 'readBy.$[el]': newName } },
+    { arrayFilters: [{ el: anonName }] }
+  );
+  await Message.updateMany(
+    { 'reactions.users': anonName },
+    { $set: { 'reactions.$[].users.$[u]': newName } },
+    { arrayFilters: [{ u: anonName }] }
+  );
+
+  // Migrate rooms
+  await Room.updateMany({ members: anonName }, { $set: { 'members.$[el]': newName } }, { arrayFilters: [{ el: anonName }] });
+  await Room.updateMany({ admins: anonName }, { $set: { 'admins.$[el]': newName } }, { arrayFilters: [{ el: anonName }] });
+  await Room.updateMany({ banned: anonName }, { $set: { 'banned.$[el]': newName } }, { arrayFilters: [{ el: anonName }] });
+  await Room.updateMany({ createdBy: anonName }, { $set: { createdBy: newName } });
+  await Room.updateMany({ dmUsers: anonName }, { $set: { 'dmUsers.$[el]': newName } }, { arrayFilters: [{ el: anonName }] });
+
+  // Rename DM rooms (their name includes the usernames)
+  for (const dmRoom of dmRooms) {
+    const updatedUsers = dmRoom.dmUsers.map((u) => (u === anonName ? newName : u));
+    const sorted = [...updatedUsers].sort();
+    await Room.updateOne({ _id: dmRoom._id }, { $set: { name: `__dm__${sorted[0]}__${sorted[1]}` } });
+  }
+
+  // Migrate scheduled messages, drafts, invitations
+  await ScheduledMessage.updateMany({ sender: anonName }, { $set: { sender: newName } });
+  await Draft.updateMany({ userName: anonName }, { $set: { userName: newName } });
+  await Invitation.updateMany({ invitedUser: anonName }, { $set: { invitedUser: newName } });
+  await Invitation.updateMany({ invitedBy: anonName }, { $set: { invitedBy: newName } });
+
+  // Rename the user document
+  await User.updateOne({ name: anonName }, { $set: { name: newName, isAnonymous: false } });
+
+  // Broadcast updates
+  const updatedRooms = await Room.find({ $or: [{ members: newName }, { createdBy: newName }] });
+  for (const room of updatedRooms) {
+    if (room.isPrivate || room.isDM) {
+      emitToUsers(room.members, 'room-updated', { room });
+    } else {
+      io.emit('room-updated', { room });
+    }
+  }
+  const allUsers = await User.find({}).select('name status lastSeen online');
+  io.emit('online-users', { users: allUsers });
+
+  res.json({ user: { id: anonUser._id, name: newName } });
+});
+
+app.get('/api/drafts', async (req: Request, res: Response): Promise<void> => {
+  const userName = typeof req.query.userName === 'string' ? req.query.userName.trim() : '';
+  if (!userName) { res.status(400).json({ error: 'userName required' }); return; }
+  const rows = await Draft.find({ userName });
+  const drafts: Record<string, string> = {};
+  for (const row of rows) drafts[row.roomId] = row.text;
+  res.json({ drafts });
+});
+
+io.on('connection', (socket) => {
+  let currentUser: string | null = null;
+
+  socket.on('authenticate', async ({ userName }: { userName: string }) => {
+    currentUser = userName;
+    if (!userSockets.has(userName)) userSockets.set(userName, new Set());
+    userSockets.get(userName)!.add(socket.id);
+    await User.findOneAndUpdate(
+      { name: userName },
+      { online: true, socketId: socket.id, lastSeen: new Date() },
+      { upsert: true, new: true }
+    );
+    const allUsers = await User.find({}).select('name status lastSeen online');
+    io.emit('online-users', { users: allUsers });
+
+    // Auto-join private/DM rooms the user is already a member of
+    const privateRooms = await Room.find({ members: userName, $or: [{ isPrivate: true }, { isDM: true }] }).select('_id');
+    for (const room of privateRooms) {
+      socket.join(room._id.toString());
+    }
+  });
+
+  socket.on('join-room', (roomId: string) => {
+    socket.join(roomId);
+  });
+
+  socket.on('join-thread', (messageId: string) => {
+    socket.join(`thread-${messageId}`);
+  });
+
+  socket.on('leave-thread', (messageId: string) => {
+    socket.leave(`thread-${messageId}`);
+  });
+
+  socket.on('leave-room', (roomId: string) => {
+    socket.leave(roomId);
+    if (currentUser) {
+      clearTyping(roomId, currentUser);
+      broadcastTyping(roomId);
+    }
+  });
+
+  socket.on('typing-start', ({ roomId }: { roomId: string }) => {
+    if (!currentUser || !roomId) return;
+    if (!typingTimers.has(roomId)) typingTimers.set(roomId, new Map());
+    clearTyping(roomId, currentUser);
+    const user = currentUser;
+    const timer = setTimeout(() => {
+      clearTyping(roomId, user);
+      broadcastTyping(roomId);
+    }, 3000);
+    typingTimers.get(roomId)!.set(currentUser, timer);
+    broadcastTyping(roomId);
+  });
+
+  socket.on('typing-stop', ({ roomId }: { roomId: string }) => {
+    if (!currentUser || !roomId) return;
+    clearTyping(roomId, currentUser);
+    broadcastTyping(roomId);
+  });
+
+  socket.on('draft-update', async ({ roomId, text }: { roomId: string; text: string }) => {
+    if (!currentUser || !roomId) return;
+    const trimmed = typeof text === 'string' ? text.slice(0, 2000) : '';
+    if (trimmed) {
+      await Draft.findOneAndUpdate(
+        { userName: currentUser, roomId },
+        { text: trimmed, updatedAt: new Date() },
+        { upsert: true, new: true }
+      );
+    } else {
+      await Draft.deleteOne({ userName: currentUser, roomId });
+    }
+    // Broadcast to other sockets of the same user (multi-device sync)
+    const userSocketSet = userSockets.get(currentUser);
+    if (userSocketSet) {
+      for (const sid of userSocketSet) {
+        if (sid !== socket.id) {
+          io.to(sid).emit('draft-updated', { roomId, text: trimmed });
+        }
+      }
+    }
+  });
+
+  socket.on('disconnect', async () => {
+    if (!currentUser) return;
+    const user = currentUser;
+    const sockets = userSockets.get(user);
+    if (sockets) {
+      sockets.delete(socket.id);
+      if (sockets.size === 0) userSockets.delete(user);
+    }
+    const stillOnline = (userSockets.get(user)?.size ?? 0) > 0;
+    if (!stillOnline) {
+      await User.findOneAndUpdate({ name: user }, { online: false, lastSeen: new Date() });
+    }
+    const roomsToUpdate: string[] = [];
+    for (const [roomId, roomMap] of typingTimers.entries()) {
+      if (roomMap.has(user)) {
+        clearTimeout(roomMap.get(user)!);
+        roomMap.delete(user);
+        roomsToUpdate.push(roomId);
+      }
+    }
+    for (const roomId of roomsToUpdate) broadcastTyping(roomId);
+    const allUsers = await User.find({}).select('name status lastSeen online');
+    io.emit('online-users', { users: allUsers });
+  });
+});
+
+setInterval(async () => {
+  try {
+    const due = await ScheduledMessage.find({ sent: false, scheduledAt: { $lte: new Date() } });
+    for (const scheduled of due) {
+      const msg = await Message.create({
+        roomId: scheduled.roomId,
+        sender: scheduled.sender,
+        text: scheduled.text,
+        readBy: [scheduled.sender],
+      });
+      scheduled.sent = true;
+      await scheduled.save();
+      const roomId = scheduled.roomId.toString();
+      io.to(roomId).emit('message', { message: msg });
+      io.to(roomId).emit('scheduled-message-sent', { scheduledId: scheduled._id.toString() });
+      trackMessageActivity(roomId);
+    }
+  } catch (err) {
+    console.error('Scheduled message poll error:', err);
+  }
+}, 10000);
+
+setInterval(async () => {
+  try {
+    // Use lean() here — we only need _id and roomId, no full documents
+    const expired = await Message.find({ expiresAt: { $lte: new Date() } }).select('_id roomId').lean();
+    for (const msg of expired) {
+      const roomId = (msg.roomId as mongoose.Types.ObjectId).toString();
+      await Message.findByIdAndDelete(msg._id);
+      io.to(roomId).emit('message-deleted', { messageId: (msg._id as mongoose.Types.ObjectId).toString(), roomId });
+    }
+  } catch (err) {
+    console.error('Ephemeral message cleanup error:', err);
+  }
+}, 5000);
+
+// Periodically re-evaluate activity levels so badges decay in real time when rooms go quiet
+setInterval(() => {
+  const now = Date.now();
+  const cutoff = now - 10 * 60 * 1000;
+  for (const [roomId, timestamps] of roomActivityTimestamps.entries()) {
+    // Trim stale entries in-place
+    let start = 0;
+    while (start < timestamps.length && timestamps[start] < cutoff) start++;
+    if (start > 0) timestamps.splice(0, start);
+
+    const level = getActivityLevel(roomId);
+    const prev = lastEmittedActivityLevel.get(roomId) ?? '';
+    if (level !== prev) {
+      lastEmittedActivityLevel.set(roomId, level);
+      io.emit('room-activity', { roomId, level });
+    }
+    if (timestamps.length === 0) {
+      roomActivityTimestamps.delete(roomId);
+      lastEmittedActivityLevel.delete(roomId);
+    }
+  }
+}, 15000);
+
+const PORT = Number(process.env.PORT) || 6001;
+httpServer.listen(PORT, () => {
+  console.log(`Server on port ${PORT}`);
+});
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/mongo-models-optimized.ts b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/mongo-models-optimized.ts
new file mode 100644
index 00000000000..98657ff067f
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/optimized-reference/mongo-models-optimized.ts
@@ -0,0 +1,164 @@
+import mongoose, { Schema, Document } from 'mongoose';
+
+export interface IUser extends Document {
+  name: string;
+  online: boolean;
+  socketId?: string;
+  lastSeen: Date;
+  status: 'online' | 'away' | 'dnd' | 'invisible';
+  isAnonymous: boolean;
+}
+
+const UserSchema = new Schema<IUser>({
+  name: { type: String, required: true, unique: true, trim: true, maxlength: 32 },
+  online: { type: Boolean, default: false },
+  socketId: { type: String },
+  lastSeen: { type: Date, default: Date.now },
+  status: { type: String, enum: ['online', 'away', 'dnd', 'invisible'], default: 'online' },
+  isAnonymous: { type: Boolean, default: false },
+});
+
+export const User = mongoose.model<IUser>('User', UserSchema);
+
+export interface IRoom extends Document {
+  name: string;
+  createdBy: string;
+  members: string[];
+  admins: string[];
+  banned: string[];
+  isPrivate: boolean;
+  isDM: boolean;
+  dmUsers: string[];
+  createdAt: Date;
+}
+
+const RoomSchema = new Schema<IRoom>({
+  name: { type: String, required: true, unique: true, trim: true, maxlength: 128 },
+  createdBy: { type: String, required: true },
+  members: [{ type: String }],
+  admins: [{ type: String }],
+  banned: [{ type: String }],
+  isPrivate: { type: Boolean, default: false },
+  isDM: { type: Boolean, default: false },
+  dmUsers: [{ type: String }],
+  createdAt: { type: Date, default: Date.now },
+});
+
+export const Room = mongoose.model<IRoom>('Room', RoomSchema);
+
+export interface IReaction {
+  emoji: string;
+  users: string[];
+}
+
+export interface IEditEntry {
+  text: string;
+  editedAt: Date;
+}
+
+export interface IMessage extends Document {
+  roomId: mongoose.Types.ObjectId;
+  sender: string;
+  text: string;
+  createdAt: Date;
+  readBy: string[];
+  expiresAt?: Date;
+  reactions: IReaction[];
+  editHistory: IEditEntry[];
+  isEdited: boolean;
+  parentId?: mongoose.Types.ObjectId;
+  replyCount: number;
+  lastReplyPreview?: string;
+  lastReplySender?: string;
+}
+
+const MessageSchema = new Schema<IMessage>({
+  roomId: { type: Schema.Types.ObjectId, ref: 'Room', required: true },
+  sender: { type: String, required: true },
+  text: { type: String, required: true, maxlength: 2000 },
+  createdAt: { type: Date, default: Date.now },
+  readBy: [{ type: String }],
+  expiresAt: { type: Date, default: null },
+  reactions: [{ emoji: { type: String, required: true }, users: [{ type: String }] }],
+  editHistory: [{ text: { type: String, required: true }, editedAt: { type: Date, required: true } }],
+  isEdited: { type: Boolean, default: false },
+  parentId: { type: Schema.Types.ObjectId, ref: 'Message', default: null },
+  replyCount: { type: Number, default: 0 },
+  lastReplyPreview: { type: String, default: null },
+  lastReplySender: { type: String, default: null },
+});
+
+// Original indexes
+MessageSchema.index({ roomId: 1, createdAt: 1 });
+MessageSchema.index({ expiresAt: 1 }, { sparse: true });
+MessageSchema.index({ parentId: 1, createdAt: 1 });
+
+// Added: compound index that covers GET /api/rooms/:roomId/messages exactly.
+// That query is: { roomId, parentId: null } ORDER BY createdAt ASC LIMIT 100.
+// The original { roomId, createdAt } index leaves parentId as a post-filter;
+// this index satisfies the full predicate + sort in a single B-tree scan.
+MessageSchema.index({ roomId: 1, parentId: 1, createdAt: 1 });
+
+export const Message = mongoose.model<IMessage>('Message', MessageSchema);
+
+export interface IScheduledMessage extends Document {
+  roomId: mongoose.Types.ObjectId;
+  sender: string;
+  text: string;
+  scheduledAt: Date;
+  sent: boolean;
+  createdAt: Date;
+}
+
+const ScheduledMessageSchema = new Schema<IScheduledMessage>({
+  roomId: { type: Schema.Types.ObjectId, ref: 'Room', required: true },
+  sender: { type: String, required: true },
+  text: { type: String, required: true, maxlength: 2000 },
+  scheduledAt: { type: Date, required: true },
+  sent: { type: Boolean, default: false },
+  createdAt: { type: Date, default: Date.now },
+});
+
+ScheduledMessageSchema.index({ scheduledAt: 1, sent: 1 });
+
+export const ScheduledMessage = mongoose.model<IScheduledMessage>('ScheduledMessage', ScheduledMessageSchema);
+
+export interface IInvitation extends Document {
+  roomId: mongoose.Types.ObjectId;
+  roomName: string;
+  invitedUser: string;
+  invitedBy: string;
+  status: 'pending' | 'accepted' | 'declined';
+  createdAt: Date;
+}
+
+const InvitationSchema = new Schema<IInvitation>({
+  roomId: { type: Schema.Types.ObjectId, ref: 'Room', required: true },
+  roomName: { type: String, required: true },
+  invitedUser: { type: String, required: true },
+  invitedBy: { type: String, required: true },
+  status: { type: String, enum: ['pending', 'accepted', 'declined'], default: 'pending' },
+  createdAt: { type: Date, default: Date.now },
+});
+
+InvitationSchema.index({ invitedUser: 1, status: 1 });
+
+export const Invitation = mongoose.model<IInvitation>('Invitation', InvitationSchema);
+
+export interface IDraft extends Document {
+  userName: string;
+  roomId: string;
+  text: string;
+  updatedAt: Date;
+}
+
+const DraftSchema = new Schema<IDraft>({
+  userName: { type: String, required: true },
+  roomId: { type: String, required: true },
+  text: { type: String, required: true, maxlength: 2000 },
+  updatedAt: { type: Date, default: Date.now },
+});
+
+DraftSchema.index({ userName: 1, roomId: 1 }, { unique: true });
+
+export const Draft = mongoose.model<IDraft>('Draft', DraftSchema);
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/clients/mongodb-client.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/clients/mongodb-client.ts
new file mode 100644
index 00000000000..57b2f266e89
--- /dev/null
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/clients/mongodb-client.ts
@@ -0,0 +1,120 @@
+// MongoDB chat-app client wrapper for the perf benchmark.
+//
+// The Level 12 generated MongoDB app (MERN: Express + Mongoose + Socket.io)
+// exposes a DIFFERENT contract than the Postgres app — identity is the
+// username string (not a numeric id), and messages are sent over REST:
+//
+//   POST /api/users { name }                       -> { user: { id, name } }
+//   POST /api/rooms { name, createdBy }            -> { room: { _id, ... } }
+//   POST /api/rooms/:roomId/join { userName }
+//   POST /api/rooms/:roomId/messages { sender, text } -> { message: {...} }   // SEND (REST)
+//   socket.emit('authenticate', { userName })       // register presence
+//   socket.emit('join-room', roomId)                // BARE string arg (not {roomId})
+//   socket.on('message', ({ message }) => ...)       // broadcast wrapped in { message }
+//
+// Notes:
+// - The Mongo app's send_message path is REST-only (there is no socket
+//   'send_message' handler), so ack latency is the POST HTTP round-trip
+//   (server inserts + responds). Fan-out latency is measured by a separate
+//   listener socket joined to the room (true server→client broadcast).
+// - IMPORTANT: unlike the PG app, the Mongo app enforces NO per-user send
+//   rate limit. Throughput numbers are therefore not directly comparable to
+//   the PG `stress` scenario (PG caps each writer at ~2 msg/s). The
+//   `realistic` scenario (human cadence, well under any throttle) is the
+//   apples-to-apples comparison.
+
+import { io, type Socket } from 'socket.io-client';
+
+export interface MongoConfig {
+  baseUrl: string; // e.g. http://localhost:6001
+}
+
+export interface MongoUser {
+  name: string; // username IS the identity in this app
+}
+
+export async function createMongoUser(cfg: MongoConfig, name: string): Promise<MongoUser> {
+  const res = await fetch(`${cfg.baseUrl}/api/users`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ name }),
+  });
+  if (!res.ok) throw new Error(`createMongoUser ${name} failed: ${res.status} ${await res.text()}`);
+  return { name };
+}
+
+export async function createMongoRoom(cfg: MongoConfig, name: string, createdBy: string): Promise<{ id: string }> {
+  const res = await fetch(`${cfg.baseUrl}/api/rooms`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ name, createdBy, isPrivate: false }),
+  });
+  if (!res.ok) throw new Error(`createMongoRoom ${name} failed: ${res.status} ${await res.text()}`);
+  const body = (await res.json()) as { room: { _id: string } };
+  return { id: body.room._id };
+}
+
+export async function joinMongoRoom(cfg: MongoConfig, roomId: string, userName: string): Promise<void> {
+  const res = await fetch(`${cfg.baseUrl}/api/rooms/${roomId}/join`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ userName }),
+  });
+  // Membership is not required to send (the messages endpoint has no member
+  // check), so a failed join is non-fatal — swallow it.
+  if (!res.ok) { /* ignore */ }
+}
+
+export interface MongoMessage {
+  sender: string;
+  text: string;
+  roomId: string;
+}
+
+export interface MongoClientHandle {
+  socket: Socket;
+  userName: string;
+  close(): void;
+}
+
+export async function connectMongoClient(
+  cfg: MongoConfig,
+  userName: string,
+  roomId: string,
+  onMessage: (msg: MongoMessage) => void,
+): Promise<MongoClientHandle> {
+  const socket = io(cfg.baseUrl, {
+    transports: ['websocket'],
+    reconnection: false,
+    forceNew: true,
+  });
+  await new Promise<void>((resolve, reject) => {
+    socket.once('connect', () => resolve());
+    socket.once('connect_error', (err) => reject(err));
+    setTimeout(() => reject(new Error('socket connect timeout')), 10_000);
+  });
+  socket.emit('authenticate', { userName });
+  socket.emit('join-room', roomId); // bare string arg, per the app's handler
+  socket.on('message', (payload: { message: MongoMessage }) => {
+    if (payload && payload.message) onMessage(payload.message);
+  });
+  return {
+    socket,
+    userName,
+    close: () => {
+      try { socket.disconnect(); } catch { /* ignore */ }
+    },
+  };
+}
+
+// REST send: POST /api/rooms/:roomId/messages { sender, text }. Returns the
+// created message on success, null on failure.
+export async function mongoSendRest(cfg: MongoConfig, roomId: string, sender: string, text: string): Promise<unknown> {
+  const res = await fetch(`${cfg.baseUrl}/api/rooms/${roomId}/messages`, {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ sender, text }),
+  });
+  if (!res.ok) return null;
+  return res.json();
+}
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts
index 34a07018ad4..2595b44f5ee 100644
--- a/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/main.ts
@@ -14,19 +14,20 @@ import { fileURLToPath } from 'node:url';
 import {
   runStressPostgres,
   runStressSpacetime,
-  type StressOpts,
+  runStressMongo,
 } from './scenarios/stress-throughput.ts';
 import {
   runRealisticPostgres,
   runRealisticSpacetime,
-  type RealisticOpts,
+  runRealisticMongo,
 } from './scenarios/realistic-chat.ts';
 import type { ScenarioResult } from './metrics.ts';
 
 interface CliArgs {
-  backend: 'pg' | 'stdb';
+  backend: 'pg' | 'stdb' | 'mongo';
   scenario: 'stress' | 'realistic' | 'all';
   pgUrl: string;
+  mongoUrl: string;
   stdbUri: string;
   stdbModule: string;
   writers: number;
@@ -40,6 +41,7 @@ function parseArgs(argv: string[]): CliArgs {
     backend: 'pg',
     scenario: 'stress',
     pgUrl: 'http://localhost:6001',
+    mongoUrl: 'http://localhost:6001',
     stdbUri: 'ws://localhost:3000',
     stdbModule: '',
     writers: 20,
@@ -51,9 +53,10 @@ function parseArgs(argv: string[]): CliArgs {
     const k = argv[i];
     const v = argv[i + 1];
     switch (k) {
-      case '--backend': a.backend = v as 'pg' | 'stdb'; i++; break;
+      case '--backend': a.backend = v as CliArgs['backend']; i++; break;
       case '--scenario': a.scenario = v as CliArgs['scenario']; i++; break;
       case '--pg-url': a.pgUrl = v!; i++; break;
+      case '--mongo-url': a.mongoUrl = v!; i++; break;
       case '--stdb-uri': a.stdbUri = v!; i++; break;
       case '--module': a.stdbModule = v!; i++; break;
       case '--writers': a.writers = parseInt(v!); i++; break;
@@ -70,6 +73,10 @@ async function runOne(args: CliArgs, scenario: 'stress' | 'realistic'): Promise<
     const cfg = { baseUrl: args.pgUrl };
     if (scenario === 'stress') return runStressPostgres(cfg, { writers: args.writers, durationSec: args.duration });
     return runRealisticPostgres(cfg, { users: args.users, durationSec: args.duration, minIntervalMs: 5000, maxIntervalMs: 15000 });
+  } else if (args.backend === 'mongo') {
+    const cfg = { baseUrl: args.mongoUrl };
+    if (scenario === 'stress') return runStressMongo(cfg, { writers: args.writers, durationSec: args.duration });
+    return runRealisticMongo(cfg, { users: args.users, durationSec: args.duration, minIntervalMs: 5000, maxIntervalMs: 15000 });
   } else {
     if (!args.stdbModule) throw new Error('--module is required for stdb');
     const cfg = { uri: args.stdbUri, moduleName: args.stdbModule };
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts
index c41a5f16a03..4ce3cad6898 100644
--- a/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/metrics.ts
@@ -45,7 +45,7 @@ export interface LatencySummary {
 
 export interface ScenarioResult {
   scenario: string;
-  backend: 'postgres' | 'spacetime';
+  backend: 'postgres' | 'spacetime' | 'mongodb';
   startedAt: string;
   durationSec: number;
   writers: number;
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts
index cf98b37554c..a85d7221d9c 100644
--- a/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/realistic-chat.ts
@@ -25,6 +25,14 @@ import {
   stdbSendMessage,
   stdbSetName,
 } from '../clients/spacetime-client.ts';
+import {
+  type MongoConfig,
+  createMongoUser,
+  createMongoRoom,
+  joinMongoRoom,
+  connectMongoClient,
+  mongoSendRest,
+} from '../clients/mongodb-client.ts';
 
 export interface RealisticOpts {
   users: number;
@@ -99,6 +107,67 @@ export async function runRealisticPostgres(cfg: PgConfig, opts: RealisticOpts):
   };
 }
 
+export async function runRealisticMongo(cfg: MongoConfig, opts: RealisticOpts): Promise<ScenarioResult> {
+  const tag = `mr${Date.now().toString(36)}`;
+  const userNames = Array.from({ length: opts.users }, (_, i) => `${tag}_u${i}`);
+  await Promise.all(userNames.map((n) => createMongoUser(cfg, n)));
+  const listenerName = `${tag}_listener`;
+  await createMongoUser(cfg, listenerName);
+  const room = await createMongoRoom(cfg, tag, listenerName);
+  await Promise.all(userNames.map((n) => joinMongoRoom(cfg, room.id, n)));
+
+  const fanout = new LatencyHistogram();
+  let received = 0;
+  let measuring = false;
+
+  // Listener measures true server→client fan-out latency under human-cadence load.
+  const listener = await connectMongoClient(cfg, listenerName, room.id, (msg) => {
+    if (!measuring) return;
+    const stamp = parseStamp(msg.text);
+    if (!stamp) return;
+    received += 1;
+    fanout.record(nsToMs(process.hrtime.bigint() - stamp.sentNs));
+  });
+
+  measuring = true;
+  const startedAt = new Date().toISOString();
+  const endTime = Date.now() + opts.durationSec * 1000;
+  let seq = 1;
+  let sent = 0;
+
+  // Users send via REST at human cadence (5-15s jitter). Well under any
+  // throttle, so this is the apples-to-apples comparison vs PG/STDB.
+  const userLoop = async (name: string): Promise<void> => {
+    while (Date.now() < endTime) {
+      try {
+        await mongoSendRest(cfg, room.id, name, stampMessage(seq++));
+        sent += 1;
+      } catch { /* ignore */ }
+      await new Promise((r) => setTimeout(r, jitter(opts.minIntervalMs, opts.maxIntervalMs)));
+    }
+  };
+  await Promise.all(userNames.map(userLoop));
+
+  await new Promise((r) => setTimeout(r, 2000));
+  measuring = false;
+  listener.close();
+
+  return {
+    scenario: 'realistic-chat',
+    backend: 'mongodb',
+    startedAt,
+    durationSec: opts.durationSec,
+    writers: opts.users,
+    sent,
+    received,
+    errors: 0,
+    msgsPerSec: received / opts.durationSec,
+    ackLatencyMs: new LatencyHistogram().summary(),
+    fanoutLatencyMs: fanout.summary(),
+    notes: `${opts.users} users, jitter ${opts.minIntervalMs}-${opts.maxIntervalMs}ms (REST send, fan-out via listener socket)`,
+  };
+}
+
 export async function runRealisticSpacetime(cfg: StdbConfig, opts: RealisticOpts): Promise<ScenarioResult> {
   const tag = `sr${Date.now().toString(36)}`;
 
diff --git a/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts
index 5c8ee982003..be910493adc 100644
--- a/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts
+++ b/tools/llm-sequential-upgrade/perf-benchmark/src/scenarios/stress-throughput.ts
@@ -29,6 +29,14 @@ import {
   stdbSendMessage,
   stdbSetName,
 } from '../clients/spacetime-client.ts';
+import {
+  type MongoConfig,
+  createMongoUser,
+  createMongoRoom,
+  joinMongoRoom,
+  connectMongoClient,
+  mongoSendRest,
+} from '../clients/mongodb-client.ts';
 
 export interface StressOpts {
   writers: number;
@@ -165,6 +173,85 @@ export async function runStressPostgres(cfg: PgConfig, opts: StressOpts): Promis
   };
 }
 
+export async function runStressMongo(cfg: MongoConfig, opts: StressOpts): Promise<ScenarioResult> {
+  const tag = `ms${Date.now().toString(36)}`;
+
+  // N writers + 1 listener; one public room they all join.
+  const writerNames = Array.from({ length: opts.writers }, (_, i) => `${tag}_w${i}`);
+  await Promise.all(writerNames.map((n) => createMongoUser(cfg, n)));
+  const listenerName = `${tag}_listener`;
+  await createMongoUser(cfg, listenerName);
+  const room = await createMongoRoom(cfg, tag, listenerName);
+  await Promise.all(writerNames.map((n) => joinMongoRoom(cfg, room.id, n)));
+
+  const ack = new LatencyHistogram();   // POST HTTP round-trip (server insert + respond)
+  const fanout = new LatencyHistogram(); // writer send → listener observes broadcast
+  let received = 0;     // delivered to listener (true fan-out)
+  let ackedSends = 0;   // HTTP-confirmed sends
+  let sent = 0;
+  let measuring = false;
+
+  // Listener: joined to the room over a socket; measures true fan-out latency.
+  const listener = await connectMongoClient(cfg, listenerName, room.id, (msg) => {
+    if (!measuring) return;
+    const stamp = parseStamp(msg.text);
+    if (!stamp) return;
+    received += 1;
+    fanout.record(nsToMs(process.hrtime.bigint() - stamp.sentNs));
+  });
+
+  // Warmup
+  await mongoSendRest(cfg, room.id, writerNames[0]!, `${'__bench:'}${process.hrtime.bigint()}:0:warmup`);
+  await new Promise((r) => setTimeout(r, 500));
+
+  measuring = true;
+  const startedAt = new Date().toISOString();
+  const endTime = Date.now() + opts.durationSec * 1000;
+  let seq = 1;
+
+  // Each writer posts as fast as its REST round-trip allows; throughput scales
+  // via concurrent writers. (No app-level rate limit in the Mongo app.)
+  const writerLoop = async (name: string): Promise<void> => {
+    while (Date.now() < endTime) {
+      const s = seq++;
+      const t0 = process.hrtime.bigint();
+      sent += 1;
+      try {
+        const resp = await mongoSendRest(cfg, room.id, name, stampMessage(s));
+        if (resp) {
+          ackedSends += 1;
+          ack.record(nsToMs(process.hrtime.bigint() - t0));
+        }
+      } catch { /* ignore */ }
+    }
+  };
+  await Promise.all(writerNames.map(writerLoop));
+
+  // Drain in-flight broadcasts
+  await new Promise((r) => setTimeout(r, 3000));
+  measuring = false;
+  listener.close();
+
+  // Throughput: prefer true delivered (listener); if the single listener socket
+  // was event-loop-bottlenecked under heavy stress, fall back to acked sends.
+  const deliveredForRate = received > 0 ? received : ackedSends;
+
+  return {
+    scenario: 'stress-throughput',
+    backend: 'mongodb',
+    startedAt,
+    durationSec: opts.durationSec,
+    writers: opts.writers,
+    sent,
+    received,
+    errors: sent - ackedSends,
+    msgsPerSec: deliveredForRate / opts.durationSec,
+    ackLatencyMs: ack.summary(),
+    fanoutLatencyMs: fanout.summary(),
+    notes: `${opts.writers} writers REST-POST as fast as possible; ack=HTTP round-trip, fanout=listener socket. NOTE: Mongo app has NO per-user send rate limit (the PG app throttles 500ms/user) — stress throughput is NOT directly comparable to PG; use the realistic scenario for that.`,
+  };
+}
+
 export async function runStressSpacetime(cfg: StdbConfig, opts: StressOpts): Promise<ScenarioResult> {
   const tag = `ss${Date.now().toString(36)}`;
 
diff --git a/tools/llm-sequential-upgrade/reset-app.sh b/tools/llm-sequential-upgrade/reset-app.sh
index f52df842379..fcda35d0eab 100644
--- a/tools/llm-sequential-upgrade/reset-app.sh
+++ b/tools/llm-sequential-upgrade/reset-app.sh
@@ -5,7 +5,7 @@
 # Usage:
 #   ./reset-app.sh <app-dir>
 #
-# This gives Playwright a clean slate — no leftover users, rooms, or messages.
+# This gives grading a clean slate — no leftover users, rooms, or messages.
 
 set -euo pipefail
 
@@ -26,11 +26,15 @@ if [[ -d "/c/Users/$_USER/AppData/Local/SpacetimeDB" ]]; then
   export PATH="$PATH:/c/Users/$_USER/AppData/Local/SpacetimeDB"
 fi
 
-# Auto-detect backend
-if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
+# Auto-detect backend. Prefer the explicit marker written by run.sh at generate
+# time; fall back to directory shape for legacy apps. The marker is the only
+# reliable way to tell postgres and mongodb apart (both use a server/ dir).
+if [[ -f "$APP_DIR/.benchmark-backend" ]]; then
+  BACKEND="$(tr -d '[:space:]' < "$APP_DIR/.benchmark-backend")"
+elif [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
   BACKEND="spacetime"
 elif [[ -d "$APP_DIR/server" ]]; then
-  BACKEND="postgres"
+  BACKEND="postgres"  # legacy fallback; mongodb apps carry the marker
 else
   echo "ERROR: Cannot detect backend in $APP_DIR"
   exit 1
@@ -98,6 +102,27 @@ elif [[ "$BACKEND" == "postgres" ]]; then
   npx drizzle-kit push 2>&1 | tail -3
   cd - > /dev/null
 
+  echo "  Database reset complete."
+
+elif [[ "$BACKEND" == "mongodb" ]]; then
+  echo "Resetting MongoDB database..."
+
+  MONGO_CONTAINER="${MONGO_CONTAINER:-llm-sequential-upgrade-mongodb-1}"
+  DB_NAME="chat-app"
+
+  # Find the database name from the server's DATABASE_URL (mongodb://host:port/<db>)
+  SERVER_DIR="$APP_DIR/server"
+  if [[ -f "$SERVER_DIR/.env" ]]; then
+    DB_URL=$(grep DATABASE_URL "$SERVER_DIR/.env" | head -1 | cut -d= -f2-)
+    DB_NAME=$(echo "$DB_URL" | sed 's|.*/||; s|?.*||')
+  fi
+
+  # Drop the whole database. Mongoose is schemaless and recreates collections on
+  # the next write (and indexes when the model is next initialized), so there is
+  # no migration / push step to run afterwards.
+  echo "  Dropping database $DB_NAME..."
+  docker exec "$MONGO_CONTAINER" mongosh "$DB_NAME" --quiet --eval "db.dropDatabase()" 2>&1 | tail -1
+
   echo "  Database reset complete."
 fi
 
diff --git a/tools/llm-sequential-upgrade/run-loop.sh b/tools/llm-sequential-upgrade/run-loop.sh
index dc7176de711..0fc0f6d3631 100644
--- a/tools/llm-sequential-upgrade/run-loop.sh
+++ b/tools/llm-sequential-upgrade/run-loop.sh
@@ -25,7 +25,6 @@ BACKEND="spacetime"
 VARIANT="one-shot"
 LEVEL=7
 RULES="guided"
-TEST_MODE=""
 RUN_INDEX=0
 MAX_FIX_ITERATIONS=5
 
@@ -35,18 +34,12 @@ while [[ $# -gt 0 ]]; do
     --variant) VARIANT="$2"; shift 2 ;;
     --level) LEVEL="$2"; shift 2 ;;
     --rules) RULES="$2"; shift 2 ;;
-    --test) TEST_MODE="$2"; shift 2 ;;
     --run-index) RUN_INDEX="$2"; shift 2 ;;
     --max-fixes) MAX_FIX_ITERATIONS="$2"; shift 2 ;;
     *) echo "Unknown option: $1"; exit 1 ;;
   esac
 done
 
-TEST_FLAG=""
-if [[ -n "$TEST_MODE" ]]; then
-  TEST_FLAG="--test $TEST_MODE"
-fi
-
 LOCK_FILE="$SCRIPT_DIR/.grade-lock"
 LOG_PREFIX="[run-$RUN_INDEX/$BACKEND]"
 
@@ -112,7 +105,6 @@ fix_bugs() {
     --fix "$app_dir" \
     --variant "$VARIANT" \
     --rules "$RULES" \
-    $TEST_FLAG \
     --run-index "$RUN_INDEX" \
     --level "$LEVEL" \
     --resume-session \
@@ -128,7 +120,6 @@ if [[ "$VARIANT" == "one-shot" ]]; then
   "$SCRIPT_DIR/run.sh" \
     --variant "$VARIANT" \
     --rules "$RULES" \
-    $TEST_FLAG \
     --backend "$BACKEND" \
     --run-index "$RUN_INDEX" \
     --level "$LEVEL"
@@ -202,7 +193,6 @@ else
     "$SCRIPT_DIR/run.sh" \
       --variant "$VARIANT" \
       --rules "$RULES" \
-      $TEST_FLAG \
       --backend "$BACKEND" \
       --run-index "$RUN_INDEX" \
       --upgrade "$APP_DIR" \
diff --git a/tools/llm-sequential-upgrade/run.sh b/tools/llm-sequential-upgrade/run.sh
index 02bc2b924fe..34e58c3d9b4 100644
--- a/tools/llm-sequential-upgrade/run.sh
+++ b/tools/llm-sequential-upgrade/run.sh
@@ -9,6 +9,7 @@
 #   ./run.sh --level 5 --backend postgres       # generate from scratch at level 5
 #   ./run.sh --variant one-shot --backend spacetime  # one-shot: all features in one prompt
 #   ./run.sh --rules standard --backend spacetime   # standard: SDK rules only, no templates
+#   ./run.sh --model claude-sonnet-4-6 --backend mongodb  # pin the model (parity)
 #   ./run.sh --run-index 1 --backend spacetime      # parallel run with offset ports
 #   ./run.sh --fix <app-dir>                    # fix bugs in existing app (reads BUG_REPORT.md)
 #   ./run.sh --upgrade <app-dir> --level 3      # add level 3 features to existing level 2 app (incremental feature file)
@@ -24,8 +25,26 @@ set -euo pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-# Configurable container name for PostgreSQL backend
+# Configurable container names for the Docker-backed databases
 POSTGRES_CONTAINER="${POSTGRES_CONTAINER:-llm-sequential-upgrade-postgres-1}"
+MONGO_CONTAINER="${MONGO_CONTAINER:-llm-sequential-upgrade-mongodb-1}"
+
+# Detect a generated app's backend via the .benchmark-backend marker, falling back to
+# directory shape. The marker is required since postgres and mongodb both use a server/ dir.
+detect_backend() {
+  local app_dir="$1"
+  if [[ -f "$app_dir/.benchmark-backend" ]]; then
+    tr -d '[:space:]' < "$app_dir/.benchmark-backend"
+    return
+  fi
+  if [[ -d "$app_dir/backend/spacetimedb" ]]; then
+    echo "spacetime"
+  elif [[ -d "$app_dir/server" ]]; then
+    echo "postgres"  # legacy fallback; mongodb apps must carry the marker
+  else
+    echo "unknown"
+  fi
+}
 
 # ─── Parse arguments ─────────────────────────────────────────────────────────
 
@@ -34,7 +53,8 @@ LEVEL_EXPLICIT=""
 BACKEND="spacetime"
 VARIANT="sequential-upgrade"
 RULES="guided"
-TEST_MODE=""  # playwright | chrome-mcp | (empty = no automated testing)
+# Pin the canonical model so unpinned runs don't inherit the CLI default. Override with --model.
+MODEL="${ANTHROPIC_MODEL:-claude-sonnet-4-6}"
 RUN_INDEX=0
 FIX_MODE=""
 FIX_APP_DIR=""
@@ -48,7 +68,7 @@ while [[ $# -gt 0 ]]; do
     --backend) BACKEND="$2"; shift 2 ;;
     --variant) VARIANT="$2"; shift 2 ;;
     --rules) RULES="$2"; shift 2 ;;
-    --test) TEST_MODE="$2"; shift 2 ;;
+    --model) MODEL="$2"; shift 2 ;;
     --run-index) RUN_INDEX="$2"; shift 2 ;;
     --fix) FIX_MODE=1; FIX_APP_DIR="$2"; shift 2 ;;
     --upgrade) UPGRADE_MODE=1; UPGRADE_APP_DIR="$2"; shift 2 ;;
@@ -68,18 +88,26 @@ esac
 # Each backend has a 100-port range. Run-index offsets within that range.
 #   SpacetimeDB: 6173 + run-index  (6173, 6174, 6175, ...)
 #   PostgreSQL:  6273 + run-index  (6273, 6274, 6275, ...)
-#   Express:     6001 + run-index  (6001, 6002, 6003, ...)
+#   MongoDB:     6373 + run-index  (6373, 6374, 6375, ...)
+#   Express:     6001 + run-index  (6001, 6002, 6003, ...)  [postgres & mongodb]
 VITE_PORT_STDB=$((6173 + RUN_INDEX))
 VITE_PORT_PG=$((6273 + RUN_INDEX))
+VITE_PORT_MONGO=$((6373 + RUN_INDEX))
 EXPRESS_PORT=$((6001 + RUN_INDEX))
 PG_PORT=6432  # Shared container, isolation via per-run database names
+MONGO_PORT=6437  # Shared container, isolation via per-run database names
 STDB_PORT=3000  # SpacetimeDB server is shared, modules are isolated by name
 
-if [[ "$BACKEND" == "spacetime" ]]; then
-  VITE_PORT=$VITE_PORT_STDB
-else
-  VITE_PORT=$VITE_PORT_PG
-fi
+# Select VITE_PORT for the current $BACKEND. Called again after fix/upgrade
+# backend detection, since $BACKEND can change once the app dir is inspected.
+select_vite_port() {
+  case "$BACKEND" in
+    spacetime) VITE_PORT=$VITE_PORT_STDB ;;
+    mongodb)   VITE_PORT=$VITE_PORT_MONGO ;;
+    *)         VITE_PORT=$VITE_PORT_PG ;;  # postgres
+  esac
+}
+select_vite_port
 
 # Variant-specific defaults
 if [[ "$VARIANT" == "one-shot" ]]; then
@@ -158,6 +186,8 @@ fi
 
 PG_DATABASE="spacetime"
 PG_CONNECTION_URL="postgresql://spacetime:spacetime@localhost:6432/spacetime"
+MONGO_DATABASE="chat-app"
+MONGO_CONNECTION_URL="mongodb://localhost:6437/chat-app"
 
 if [[ "$BACKEND" == "spacetime" ]]; then
   if spacetime server ping local &>/dev/null; then
@@ -189,6 +219,25 @@ elif [[ "$BACKEND" == "postgres" ]]; then
     echo "[OK] PostgreSQL database: $PG_DATABASE (default)"
   fi
   PG_CONNECTION_URL="postgresql://spacetime:spacetime@localhost:6432/$PG_DATABASE"
+elif [[ "$BACKEND" == "mongodb" ]]; then
+  if docker exec "$MONGO_CONTAINER" mongosh --quiet --eval "db.runCommand({ping:1})" &>/dev/null; then
+    echo "[OK] MongoDB container is running"
+  else
+    echo "[FAIL] MongoDB is not reachable. Check Docker container $MONGO_CONTAINER."
+    exit 1
+  fi
+
+  # Per-run database isolation: each run-index gets its own database.
+  # MongoDB creates databases lazily on first write, so there's nothing to
+  # pre-create — just pick a distinct name. Run 0 uses "chat-app".
+  if [[ $RUN_INDEX -gt 0 ]]; then
+    MONGO_DATABASE="chat-app_run${RUN_INDEX}"
+    echo "[OK] MongoDB database: $MONGO_DATABASE (run-index $RUN_INDEX)"
+  else
+    MONGO_DATABASE="chat-app"
+    echo "[OK] MongoDB database: $MONGO_DATABASE (default)"
+  fi
+  MONGO_CONNECTION_URL="mongodb://localhost:6437/$MONGO_DATABASE"
 fi
 
 if ! docker info &>/dev/null; then
@@ -236,14 +285,13 @@ else
   exit 1
 fi
 
-# Strip UI contracts from prompt if not using Playwright testing
-if [[ "$TEST_MODE" != "playwright" ]]; then
-  STRIPPED_PROMPT="/tmp/seq-upgrade-prompt-${RUN_INDEX}-$(basename "$PROMPT_FILE")"
-  # Remove **UI contract:** blocks (from the line through the next blank line or next ###)
-  sed '/^\*\*UI contract:\*\*/,/^$/d; /^\*\*Important:\*\* Each feature below includes/d' "$PROMPT_FILE" > "$STRIPPED_PROMPT"
-  PROMPT_FILE="$STRIPPED_PROMPT"
-  echo "[OK] UI contracts stripped (test=$TEST_MODE)"
-fi
+# Strip UI contracts from the prompt. They exist only for deterministic automated
+# UI assertions, which we don't use — grading is manual/in-browser.
+STRIPPED_PROMPT="/tmp/seq-upgrade-prompt-${RUN_INDEX}-$(basename "$PROMPT_FILE")"
+# Remove **UI contract:** blocks (from the line through the next blank line or next ###)
+sed '/^\*\*UI contract:\*\*/,/^$/d; /^\*\*Important:\*\* Each feature below includes/d' "$PROMPT_FILE" > "$STRIPPED_PROMPT"
+PROMPT_FILE="$STRIPPED_PROMPT"
+echo "[OK] UI contracts stripped"
 
 echo ""
 
@@ -272,13 +320,13 @@ if [[ -n "$UPGRADE_MODE" || -n "$FIX_MODE" ]]; then
   else
     APP_DIR="$FIX_APP_DIR"
   fi
-  # Detect backend from app directory structure BEFORE deriving paths.
-  # Must happen here so $BACKEND is correct for TELEMETRY_DIR assignment below.
-  if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
-    BACKEND="spacetime"
-  elif [[ -d "$APP_DIR/server" ]]; then
-    BACKEND="postgres"
-  fi
+  # Detect backend from the app's marker (or directory shape) BEFORE deriving
+  # paths. Must happen here so $BACKEND is correct for TELEMETRY_DIR below.
+  _detected="$(detect_backend "$APP_DIR")"
+  [[ "$_detected" != "unknown" ]] && BACKEND="$_detected"
+  # Backend may have changed — recompute the Vite port so fix/upgrade prompts
+  # reference the correct one (e.g. mongodb 6373, not the pre-detection default).
+  select_vite_port
   # Walk up from app dir: chat-app-* → results → <backend> → <variant>-DATE
   RUN_BASE_DIR="$(cd "$APP_DIR/../../.." 2>/dev/null && pwd)"
   # Validate it looks like a run base dir (has a backend subdirectory)
@@ -318,6 +366,9 @@ else
   RUN_ID="$BACKEND-level$LEVEL-$TIMESTAMP"
   APP_DIR="$RESULTS_DIR/chat-app-$TIMESTAMP"
   mkdir -p "$APP_DIR"
+  # Marker so fix/upgrade mode can reliably re-detect the backend later
+  # (postgres and mongodb both use a server/ dir; this disambiguates them).
+  echo "$BACKEND" > "$APP_DIR/.benchmark-backend"
 fi
 
 RUN_DIR="$TELEMETRY_DIR/$RUN_ID"
@@ -337,6 +388,7 @@ fi
 echo "=== Sequential Upgrade: ${MODE_LABEL^} ==="
 echo "  Variant:   $VARIANT"
 echo "  Rules:     $RULES"
+echo "  Model:     ${MODEL:-(CLI default)}"
 echo "  Level:     $LEVEL"
 echo "  Backend:   $BACKEND"
 echo "  Run index: $RUN_INDEX (Vite=$VITE_PORT)"
@@ -352,6 +404,10 @@ echo ""
 unset CLAUDE_CODE_PROVIDER_MANAGED_BY_HOST
 unset CLAUDE_CODE_ENTRYPOINT
 
+# Force 5-min cache tier + freeze CLI version for consistent cost billing across runs.
+export FORCE_PROMPT_CACHING_5M=1
+export DISABLE_AUTOUPDATER=1
+
 export CLAUDE_CODE_ENABLE_TELEMETRY=1
 export OTEL_LOGS_EXPORTER=otlp
 export OTEL_METRICS_EXPORTER=otlp
@@ -389,11 +445,12 @@ cat > "$RUN_DIR/metadata.json" <<EOF
   "phase": "$MODE_LABEL",
   "variant": "$VARIANT",
   "rules": "$RULES",
-  "testMode": "${TEST_MODE:-none}",
+  "model": "${MODEL:-default}",
   "runIndex": $RUN_INDEX,
   "vitePort": $VITE_PORT,
   "expressPort": $EXPRESS_PORT,
   "pgDatabase": "${PG_DATABASE:-}",
+  "mongoDatabase": "${MONGO_DATABASE:-}",
   "sessionId": "$SESSION_ID"
 }
 EOF
@@ -420,7 +477,8 @@ snapshot_inputs() {
   # Backend specs (only relevant backend)
   cp "$SCRIPT_DIR/backends/$BACKEND.md" "$INPUTS_DIR/backends/" 2>/dev/null || true
   if [[ "$BACKEND" == "spacetime" ]]; then
-    cp "$SCRIPT_DIR/backends/spacetime-sdk-rules.md" "$INPUTS_DIR/backends/" 2>/dev/null || true
+    cp "$SCRIPT_DIR/../../skills/typescript-server/SKILL.md" "$INPUTS_DIR/backends/typescript-server-SKILL.md" 2>/dev/null || true
+    cp "$SCRIPT_DIR/../../skills/typescript-client/SKILL.md" "$INPUTS_DIR/backends/typescript-client-SKILL.md" 2>/dev/null || true
     cp "$SCRIPT_DIR/backends/spacetime-templates.md" "$INPUTS_DIR/backends/" 2>/dev/null || true
   fi
 
@@ -464,14 +522,8 @@ if [[ -n "$FIX_MODE" ]]; then
   echo "  Bug report: $APP_DIR_NATIVE/BUG_REPORT.md"
   echo ""
 
-  # Detect backend from existing app directory structure
-  if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
-    FIX_BACKEND="spacetime"
-  elif [[ -d "$APP_DIR/server" ]]; then
-    FIX_BACKEND="postgres"
-  else
-    FIX_BACKEND="unknown"
-  fi
+  # Detect backend from the app's marker (or directory shape)
+  FIX_BACKEND="$(detect_backend "$APP_DIR")"
 
   PROMPT=$(cat <<PROMPT_EOF
 Fix the bugs in the sequential upgrade app.
@@ -532,14 +584,8 @@ elif [[ -n "$UPGRADE_MODE" ]]; then
     echo "  Saved to $SNAPSHOT_DIR"
   fi
 
-  # Detect backend from existing app directory structure
-  if [[ -d "$APP_DIR/backend/spacetimedb" ]]; then
-    UPGRADE_BACKEND="spacetime"
-  elif [[ -d "$APP_DIR/server" ]]; then
-    UPGRADE_BACKEND="postgres"
-  else
-    UPGRADE_BACKEND="unknown"
-  fi
+  # Detect backend from the app's marker (or directory shape)
+  UPGRADE_BACKEND="$(detect_backend "$APP_DIR")"
 
   # Resolve prompt file path
   if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
@@ -678,44 +724,67 @@ if [[ -z "$FIX_MODE" && -z "$UPGRADE_MODE" ]]; then
   #   standard: SDK rules only (no templates, no step-by-step phases)
   #   minimal:  just the tech stack name (least prescriptive)
   if [[ "$RULES" == "minimal" ]]; then
-    if [[ "$BACKEND" == "spacetime" ]]; then
-      echo "Build this app using the SpacetimeDB TypeScript SDK (npm package: spacetimedb)." > "$APP_DIR/CLAUDE.md"
-      echo "Server module in backend/spacetimedb/, React client in client/." >> "$APP_DIR/CLAUDE.md"
-      echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
-    else
-      echo "Build this app using PostgreSQL + Express + Socket.io + Drizzle ORM." > "$APP_DIR/CLAUDE.md"
-      echo "Express server in server/, React client in client/." >> "$APP_DIR/CLAUDE.md"
-      echo "PostgreSQL connection: $PG_CONNECTION_URL" >> "$APP_DIR/CLAUDE.md"
-      echo "Express port: $EXPRESS_PORT | Vite port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
-    fi
+    case "$BACKEND" in
+      spacetime)
+        echo "Build this app using the SpacetimeDB TypeScript SDK (npm package: spacetimedb)." > "$APP_DIR/CLAUDE.md"
+        echo "Server module in backend/spacetimedb/, React client in client/." >> "$APP_DIR/CLAUDE.md"
+        echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+        ;;
+      mongodb)
+        echo "Build this app using MongoDB + Express + Socket.io + Mongoose." > "$APP_DIR/CLAUDE.md"
+        echo "Express server in server/, React client in client/." >> "$APP_DIR/CLAUDE.md"
+        echo "MongoDB connection: $MONGO_CONNECTION_URL" >> "$APP_DIR/CLAUDE.md"
+        echo "Express port: $EXPRESS_PORT | Vite port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+        ;;
+      *)  # postgres
+        echo "Build this app using PostgreSQL + Express + Socket.io + Drizzle ORM." > "$APP_DIR/CLAUDE.md"
+        echo "Express server in server/, React client in client/." >> "$APP_DIR/CLAUDE.md"
+        echo "PostgreSQL connection: $PG_CONNECTION_URL" >> "$APP_DIR/CLAUDE.md"
+        echo "Express port: $EXPRESS_PORT | Vite port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+        ;;
+    esac
     echo "Assembled minimal CLAUDE.md (rules=$RULES)"
   elif [[ "$RULES" == "standard" ]]; then
-    if [[ "$BACKEND" == "spacetime" ]]; then
-      cat "$SCRIPT_DIR/backends/spacetime-sdk-rules.md" > "$APP_DIR/CLAUDE.md"
-    else
-      echo "# PostgreSQL Backend" > "$APP_DIR/CLAUDE.md"
-      echo "" >> "$APP_DIR/CLAUDE.md"
-      echo "PostgreSQL connection: \`$PG_CONNECTION_URL\`" >> "$APP_DIR/CLAUDE.md"
-      echo "" >> "$APP_DIR/CLAUDE.md"
-      echo "Use Express (port $EXPRESS_PORT) + Socket.io + Drizzle ORM. Server in \`server/\`, client in \`client/\`." >> "$APP_DIR/CLAUDE.md"
-      echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
-    fi
+    case "$BACKEND" in
+      spacetime)
+        _strip='NR==1 && /^---$/ {fm=1; next} fm && /^---$/ {fm=0; next} !fm {print}'
+        { awk "$_strip" "$SCRIPT_DIR/../../skills/typescript-server/SKILL.md"
+          echo ""; echo "---"; echo ""
+          awk "$_strip" "$SCRIPT_DIR/../../skills/typescript-client/SKILL.md"
+        } > "$APP_DIR/CLAUDE.md"
+        ;;
+      mongodb)
+        echo "# MongoDB Backend" > "$APP_DIR/CLAUDE.md"
+        echo "" >> "$APP_DIR/CLAUDE.md"
+        echo "MongoDB connection: \`$MONGO_CONNECTION_URL\`" >> "$APP_DIR/CLAUDE.md"
+        echo "" >> "$APP_DIR/CLAUDE.md"
+        echo "Use Express (port $EXPRESS_PORT) + Socket.io + Mongoose. Server in \`server/\`, client in \`client/\`." >> "$APP_DIR/CLAUDE.md"
+        echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+        ;;
+      *)  # postgres
+        echo "# PostgreSQL Backend" > "$APP_DIR/CLAUDE.md"
+        echo "" >> "$APP_DIR/CLAUDE.md"
+        echo "PostgreSQL connection: \`$PG_CONNECTION_URL\`" >> "$APP_DIR/CLAUDE.md"
+        echo "" >> "$APP_DIR/CLAUDE.md"
+        echo "Use Express (port $EXPRESS_PORT) + Socket.io + Drizzle ORM. Server in \`server/\`, client in \`client/\`." >> "$APP_DIR/CLAUDE.md"
+        echo "Vite dev server port: $VITE_PORT" >> "$APP_DIR/CLAUDE.md"
+        ;;
+    esac
     echo "Assembled standard CLAUDE.md (rules=$RULES)"
   else
-    # guided (default) — full phases + SDK rules + templates
+    # guided (default) — phases + the official customer SDK skills + templates.
     if [[ "$BACKEND" == "spacetime" ]]; then
+      _strip_fm() { awk 'NR==1 && /^---$/ {fm=1; next} fm && /^---$/ {fm=0; next} !fm {print}' "$1"; }
       {
         cat "$SCRIPT_DIR/backends/spacetime.md"
-        echo ""
-        echo "---"
-        echo ""
-        cat "$SCRIPT_DIR/backends/spacetime-sdk-rules.md"
-        echo ""
-        echo "---"
-        echo ""
+        echo ""; echo "---"; echo ""
+        _strip_fm "$SCRIPT_DIR/../../skills/typescript-server/SKILL.md"
+        echo ""; echo "---"; echo ""
+        _strip_fm "$SCRIPT_DIR/../../skills/typescript-client/SKILL.md"
+        echo ""; echo "---"; echo ""
         cat "$SCRIPT_DIR/backends/spacetime-templates.md"
       } > "$APP_DIR/CLAUDE.md"
-      echo "Assembled guided CLAUDE.md from spacetime.md + sdk-rules + templates"
+      echo "Assembled guided CLAUDE.md from spacetime.md + official skills + templates"
     else
       cp "$SCRIPT_DIR/backends/$BACKEND.md" "$APP_DIR/CLAUDE.md"
       echo "Copied backends/$BACKEND.md → app CLAUDE.md"
@@ -731,12 +800,15 @@ if [[ -z "$FIX_MODE" && -z "$UPGRADE_MODE" ]]; then
     sed -i \
       -e "s/6173/$VITE_PORT_STDB/g" \
       -e "s/6273/$VITE_PORT_PG/g" \
+      -e "s/6373/$VITE_PORT_MONGO/g" \
       -e "s/:6001/:$EXPRESS_PORT/g" \
       -e "s/localhost:6001/localhost:$EXPRESS_PORT/g" \
       -e "s|localhost:6432/spacetime|localhost:6432/$PG_DATABASE|g" \
       -e "s|spacetime:spacetime@localhost:6432/spacetime|spacetime:spacetime@localhost:6432/$PG_DATABASE|g" \
+      -e "s|localhost:6437/chat-app|localhost:6437/$MONGO_DATABASE|g" \
       "$APP_DIR/CLAUDE.md"
-    echo "  Patched for run-index=$RUN_INDEX (Vite=$VITE_PORT, Express=$EXPRESS_PORT, DB=$PG_DATABASE)"
+    if [[ "$BACKEND" == "mongodb" ]]; then _DB_LABEL="$MONGO_DATABASE"; else _DB_LABEL="$PG_DATABASE"; fi
+    echo "  Patched for run-index=$RUN_INDEX (Vite=$VITE_PORT, Express=$EXPRESS_PORT, DB=$_DB_LABEL)"
   fi
 fi
 
@@ -780,12 +852,25 @@ if [[ -n "$RESUME_SESSION" && -n "$UPGRADE_MODE" ]]; then
   fi
 fi
 
-# --fork-session creates a new session branched from the prior one (keeps context)
-$CLAUDE_CMD --print --verbose --output-format text --dangerously-skip-permissions \
-  --add-dir "$APP_DIR" \
-  --add-dir "$SCRIPT_DIR" \
-  --add-dir "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts" \
-  --session-id "$SESSION_ID" $RESUME_FLAG -p "$PROMPT"
+# Pin the model when one is set (via --model or $ANTHROPIC_MODEL); otherwise the
+# CLI default is used. Same model across backends/levels = fair comparison.
+MODEL_FLAG=""
+if [[ -n "$MODEL" ]]; then
+  MODEL_FLAG="--model $MODEL"
+fi
+
+# Build args as an array so empty optional flags (model/resume) can't break the invocation.
+CLAUDE_ARGS=(
+  --print --verbose --output-format text --dangerously-skip-permissions
+  --add-dir "$APP_DIR"
+  --add-dir "$SCRIPT_DIR"
+  --add-dir "$SCRIPT_DIR/../llm-oneshot/apps/chat-app/prompts"
+  --session-id "$SESSION_ID"
+)
+[[ -n "$MODEL" ]] && CLAUDE_ARGS+=(--model "$MODEL")
+[[ -n "${PREV_SESSION_ID:-}" ]] && CLAUDE_ARGS+=(--resume "$PREV_SESSION_ID" --fork-session)
+CLAUDE_ARGS+=(-p "$PROMPT")
+$CLAUDE_CMD "${CLAUDE_ARGS[@]}"
 EXIT_CODE=$?
 
 echo ""
@@ -813,6 +898,17 @@ m.sessionId = '$SESSION_ID';
 fs.writeFileSync(f, JSON.stringify(m, null, 2));
 " -- "$METADATA_FILE_NATIVE" || echo "WARNING: Failed to update metadata with end time"
 
+# ─── Capture Claude Code session transcript ──────────────────────────────────
+# Copy the session transcript (located by session id) next to the telemetry as a studyable record.
+TRANSCRIPT_SRC=$(find "$HOME/.claude/projects" -name "$SESSION_ID.jsonl" 2>/dev/null | head -1)
+if [[ -n "$TRANSCRIPT_SRC" && -f "$TRANSCRIPT_SRC" ]]; then
+  if cp "$TRANSCRIPT_SRC" "$RUN_DIR/session-transcript.jsonl" 2>/dev/null; then
+    echo "Saved session transcript -> $RUN_DIR/session-transcript.jsonl"
+  fi
+else
+  echo "NOTE: session transcript for $SESSION_ID not found under ~/.claude/projects"
+fi
+
 # ─── Snapshot completed level (upgrade mode) ─────────────────────────────────
 
 if [[ -n "$UPGRADE_MODE" && $EXIT_CODE -eq 0 ]]; then
@@ -879,75 +975,3 @@ else
   echo "WARNING: Telemetry parsing failed. Raw logs at: $SHARED_TELEMETRY_DIR/logs.jsonl"
 fi
 
-# ─── Auto-grade with Playwright (if installed) ──────────────────────────────
-
-PLAYWRIGHT_DIR="$SCRIPT_DIR/test-plans/playwright"
-if [[ $EXIT_CODE -eq 0 && "$TEST_MODE" == "playwright" && -f "$PLAYWRIGHT_DIR/node_modules/.bin/playwright" ]]; then
-  echo ""
-  echo "=== Auto-grading with Playwright ==="
-  echo "  App URL: http://localhost:$VITE_PORT"
-
-  # Wait for dev server to be ready
-  READY=0
-  for i in $(seq 1 30); do
-    if curl -s -o /dev/null -w "%{http_code}" "http://localhost:$VITE_PORT" 2>/dev/null | grep -q "200"; then
-      READY=1
-      break
-    fi
-    sleep 1
-  done
-
-  if [[ $READY -eq 1 ]]; then
-    # Reset backend state for a clean test (fresh module or DB)
-    echo "Resetting backend state for clean test..."
-    "$SCRIPT_DIR/reset-app.sh" "$APP_DIR" || echo "WARNING: Backend reset failed — tests may use stale state"
-
-    # Wait for the app to reconnect after reset
-    sleep 3
-
-    # Determine which feature specs to run based on prompt level
-    # Level → max feature number mapping:
-    #   1=4, 2=5, 3=6, 4=7, 5=8, 6=9, 7=10, 8=11, 9=12, 10=13, 11=14, 12=15,
-    #   13=16, 14=17, 15=18, 16=19, 17=20, 18=21, 19=22
-    MAX_FEATURE=$((LEVEL + 3))
-    if [[ $MAX_FEATURE -gt 22 ]]; then MAX_FEATURE=22; fi
-
-    PW_SPEC_FILES=""
-    for feat_num in $(seq 1 $MAX_FEATURE); do
-      FEAT_PAD=$(printf '%02d' "$feat_num")
-      SPEC_FILE=$(ls "$PLAYWRIGHT_DIR/specs/feature-${FEAT_PAD}-"*.spec.ts 2>/dev/null | head -1)
-      if [[ -n "$SPEC_FILE" ]]; then
-        PW_SPEC_FILES="$PW_SPEC_FILES $SPEC_FILE"
-      fi
-    done
-    echo "  Testing features 1-$MAX_FEATURE ($LEVEL prompt level)"
-
-    mkdir -p /tmp/pw-results-$RUN_INDEX
-    cd "$PLAYWRIGHT_DIR"
-    APP_URL="http://localhost:$VITE_PORT" npx playwright test $PW_SPEC_FILES --reporter=json \
-      1>/tmp/pw-results-$RUN_INDEX/results.json 2>/dev/null || true
-    cd "$APP_DIR"
-
-    RESULTS_SIZE=$(wc -c < /tmp/pw-results-$RUN_INDEX/results.json 2>/dev/null || echo "0")
-    if [[ "$RESULTS_SIZE" -gt 100 ]]; then
-      PW_RESULTS="/tmp/pw-results-$RUN_INDEX/results.json"
-      if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" ]]; then
-        PW_RESULTS=$(cygpath -w "$PW_RESULTS")
-      fi
-      node "$SCRIPT_DIR_NATIVE/parse-playwright-results.mjs" "$PW_RESULTS" "$APP_DIR_NATIVE" "$BACKEND"
-      # Copy raw results into telemetry dir for archival
-      cp /tmp/pw-results-$RUN_INDEX/results.json "$RUN_DIR/playwright-results.json" 2>/dev/null || true
-    else
-      echo "WARNING: Playwright produced no results (app may not have loaded)"
-    fi
-  else
-    echo "WARNING: Dev server not responding on port $VITE_PORT — skipping Playwright grading"
-  fi
-elif [[ $EXIT_CODE -eq 0 && "$TEST_MODE" == "agents" ]]; then
-  echo ""
-  echo "=== Auto-grading with Playwright Agents ==="
-  "$SCRIPT_DIR/grade-agents.sh" "$APP_DIR" 2>&1 || echo "WARNING: Agent grading failed"
-elif [[ $EXIT_CODE -ne 0 ]]; then
-  echo "Skipping auto-grade — code generation failed (exit $EXIT_CODE)"
-fi
-
diff --git a/tools/llm-sequential-upgrade/templates/BUG_REPORT.template.md b/tools/llm-sequential-upgrade/templates/BUG_REPORT.template.md
new file mode 100644
index 00000000000..150e06484a1
--- /dev/null
+++ b/tools/llm-sequential-upgrade/templates/BUG_REPORT.template.md
@@ -0,0 +1,50 @@
+<!--
+  BUG_REPORT.md — TEMPLATE
+  ========================
+  Written by the GRADER (you), filed into the app directory:
+    <variant>/<variant>-DATE/<backend>/results/chat-app-<ts>/BUG_REPORT.md
+
+  The fix agent reads this verbatim (run.sh --fix keys on its existence).
+  When every feature passes, DELETE this file — its absence tells the harness
+  the app is done.
+
+  Conventions (keep these for cross-backend comparability — spacetime / postgres / mongodb):
+    - Write ONLY from observed browser behavior, never from source code.
+    - Reference the FEATURE and user-visible behavior, not the implementation.
+    - One file = all currently-open bugs, numbered `## Bug 1`, `## Bug 2`, ...
+    - Pick ONE body style per bug:
+        (a) Description / Expected / Actual            — state & logic bugs
+        (b) Steps to reproduce / Expected / Actual     — interaction bugs
+    - Optional fields: **Severity:**, **Note:**, **Fix required:** (a pointer for
+      the fix agent, e.g. "check the reducer/subscription path").
+
+  Delete this comment block in the real file.
+-->
+
+# Bug Report
+
+## Bug 1: <one-line title of what is broken>
+
+**Feature:** <Feature Name> (Feature N)
+**Severity:** Critical — feature non-functional   <!-- optional; omit if minor -->
+
+**Description:** <what is wrong, in behavioral terms>
+
+**Expected:** <what should happen>
+**Actual:** <what actually happens>
+
+<!-- Optional pointer for the fix agent: -->
+**Fix required:** <where to look / what to debug>
+
+
+## Bug 2: <one-line title>
+
+**Feature:** <Feature Name>
+
+**Steps to reproduce:**
+1. <step>
+2. <step>
+3. Expected: <expected behavior>
+4. Actual: <actual behavior>
+
+**Note:** <e.g. "Regular (non-ephemeral) messages still work correctly.">
diff --git a/tools/llm-sequential-upgrade/templates/ITERATION_LOG.template.md b/tools/llm-sequential-upgrade/templates/ITERATION_LOG.template.md
new file mode 100644
index 00000000000..21fe29090fa
--- /dev/null
+++ b/tools/llm-sequential-upgrade/templates/ITERATION_LOG.template.md
@@ -0,0 +1,70 @@
+<!--
+  ITERATION_LOG.md — TEMPLATE
+  ===========================
+  Per-iteration fix history, kept in the app directory:
+    <variant>/<variant>-DATE/<backend>/results/chat-app-<ts>/ITERATION_LOG.md
+
+  APPEND only — never overwrite. The fix agent appends a `## Iteration N` block
+  after every fix cycle; the grader may add notes. The reprompt / iteration count
+  here feeds the "iterations to done" investor metric, so keep exactly one
+  `## Iteration N` block per fix cycle.
+
+  `**Category:**` is one of:
+    Feature Broken | Compilation/Build | Runtime/Crash | Integration | Data/State
+
+  `**Redeploy:**` is one of: Client only | Server only | Both
+    (spacetime: `spacetime publish` then restart client;
+     postgres/mongodb: restart Express server and/or client)
+
+  Delete this comment block in the real file.
+-->
+
+# Iteration Log
+
+## Run Info
+- **Backend:** <spacetime | postgres | mongodb>
+- **Level:** <N>
+- **Started:** <YYYY-MM-DDThh:mm:ss>
+- **Run ID:** <backend>-level<N>-<timestamp>
+
+---
+
+## Build Notes
+<!-- Environment / build workarounds that are NOT code reprompts. -->
+
+### <build issue title>
+<what happened and how it was worked around>
+
+### Build: PASS
+- Server `tsc --noEmit`: clean
+- Client `tsc --noEmit`: clean
+- Client `vite build`: success
+
+---
+
+## Iteration 0 — Deploy (hh:mm)
+
+**Status:** Deployed successfully
+- Client dev server running at http://localhost:<vite-port>
+- (postgres/mongodb) API server running at http://localhost:6001
+
+**Reprompts:** 0 build reprompts
+
+---
+
+## Iteration 1 — Fix (YYYY-MM-DD)
+
+**Category:** Feature Broken (<count> bugs)
+
+**Bug 1: <title matching the BUG_REPORT bug>**
+- Root cause: <what was actually wrong>
+- Fix: <what changed>
+- Files changed: `<file>` (<function / section>)
+
+**Bug 2: <title>**
+- Root cause: <...>
+- Fix: <...>
+- Files changed: `<file>`
+
+**Redeploy:** Client only | Server only | Both
+**Server status:** Client at http://localhost:<vite-port> ✓ <!-- (+ API at :6001 for postgres/mongodb) -->
diff --git a/tools/llm-sequential-upgrade/templates/README.md b/tools/llm-sequential-upgrade/templates/README.md
new file mode 100644
index 00000000000..a247685a8f0
--- /dev/null
+++ b/tools/llm-sequential-upgrade/templates/README.md
@@ -0,0 +1,25 @@
+# Grading Artifact Templates
+
+Canonical formats for the two grading artifacts produced during a sequential
+upgrade run. They match the published results in
+[`spacetimedb-ai-test-results`](https://github.com/clockworklabs/spacetimedb-ai-test-results)
+so every backend (spacetime / postgres / mongodb) files identically-structured
+artifacts — which is what keeps the runs comparable.
+
+| Template | Written by | Lives at |
+|---|---|---|
+| [`BUG_REPORT.template.md`](BUG_REPORT.template.md) | the grader (manual) | `<backend>/results/chat-app-<ts>/BUG_REPORT.md` |
+| [`ITERATION_LOG.template.md`](ITERATION_LOG.template.md) | the fix agent (appends), grader may annotate | `<backend>/results/chat-app-<ts>/ITERATION_LOG.md` |
+
+## Usage
+
+When grading an app and finding bugs, copy `BUG_REPORT.template.md` into the app
+directory as `BUG_REPORT.md`, fill it in from observed browser behavior, then run
+`./run.sh --fix <app-dir>`. Delete `BUG_REPORT.md` when all features pass — the
+harness keys `--fix` on its existence.
+
+`ITERATION_LOG.md` is append-only; one `## Iteration N` block per fix cycle. Its
+iteration/reprompt count feeds the "iterations to done" benchmark metric.
+
+> Grading is **manual** (graded in-browser by a human), so there is no dependency
+> on the automated Playwright suite for the comparison numbers.