From 122c5eb8530409688063b58b7e995172f2ae6eb9 Mon Sep 17 00:00:00 2001
From: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 13:38:18 -0400
Subject: [PATCH 1/5] ci: add weekly audit for uv override-dependencies

Signed-off-by: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/scripts/audit_overrides.py    | 234 ++++++++++++++++++++++++++
 .github/workflows/audit-overrides.yml |  70 ++++++++
 2 files changed, 304 insertions(+)
 create mode 100644 .github/scripts/audit_overrides.py
 create mode 100644 .github/workflows/audit-overrides.yml

diff --git a/.github/scripts/audit_overrides.py b/.github/scripts/audit_overrides.py
new file mode 100644
index 0000000000..71cc41f9b2
--- /dev/null
+++ b/.github/scripts/audit_overrides.py
@@ -0,0 +1,234 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Audit `[tool.uv] override-dependencies` in pyproject.toml for staleness.
+
+For each entry, runs a fresh `uv lock` with that entry removed and classifies:
+  - load-bearing: removal breaks resolution (keep the override)
+  - shaping:     resolves cleanly but the locked version violates the override
+                 spec (override is actively pinning -- review whether the
+                 alternate version is acceptable)
+  - stale:       resolves cleanly with a satisfying version, or the package is
+                 not in the lock at all (override is a no-op -- removable)
+
+Outputs a markdown report to stdout and optionally to --output / --json.
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import json
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import tomllib
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+from packaging.requirements import Requirement
+from packaging.specifiers import SpecifierSet
+from packaging.version import InvalidVersion, Version
+
+IGNORE_PATTERNS = shutil.ignore_patterns(
+    ".git",
+    ".venv",
+    "venv",
+    "__pycache__",
+    "*.egg-info",
+    ".pytest_cache",
+    ".ruff_cache",
+    ".mypy_cache",
+    "node_modules",
+    "build",
+    "dist",
+)
+
+
+@dataclass
+class Result:
+    spec: str
+    category: str  # load-bearing | shaping | stale | error
+    detail: str
+    log_excerpt: str = ""
+
+
+def load_overrides(pyproject: Path) -> list[str]:
+    data = tomllib.loads(pyproject.read_text())
+    return list(data.get("tool", {}).get("uv", {}).get("override-dependencies", []))
+
+
+def remove_override_line(pyproject: Path, spec: str) -> None:
+    """Strip the single line containing the given override spec."""
+    text = pyproject.read_text()
+    pattern = re.compile(
+        r'^[ \t]*"' + re.escape(spec) + r'"[ \t]*,?[ \t]*(#.*)?\n',
+        re.MULTILINE,
+    )
+    new_text, count = pattern.subn("", text, count=1)
+    if count != 1:
+        msg = f"Could not locate override line for {spec!r} in {pyproject}"
+        raise RuntimeError(msg)
+    pyproject.write_text(new_text)
+
+
+def run_uv_lock(workdir: Path, timeout: int) -> tuple[bool, str]:
+    lockfile = workdir / "uv.lock"
+    if lockfile.exists():
+        lockfile.unlink()
+    proc = subprocess.run(
+        ["uv", "lock"],
+        cwd=workdir,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+        check=False,
+    )
+    return proc.returncode == 0, (proc.stderr + proc.stdout)
+
+
+def locked_version(lockfile: Path, name: str) -> str | None:
+    canonical = name.lower().replace("_", "-")
+    data = tomllib.loads(lockfile.read_text())
+    for pkg in data.get("package", []):
+        pkg_name = pkg.get("name", "").lower().replace("_", "-")
+        if pkg_name == canonical:
+            return pkg.get("version")
+    return None
+
+
+def classify(spec: str, success: bool, log: str, lockfile: Path | None) -> Result:
+    if not success:
+        return Result(spec, "load-bearing", "removing the override breaks resolution", log[-600:])
+
+    assert lockfile is not None
+    try:
+        req = Requirement(spec)
+    except Exception as e:  # noqa: BLE001
+        return Result(spec, "error", f"failed to parse spec: {e}")
+
+    ver = locked_version(lockfile, req.name)
+
+    # "Ban" override (no specifier, e.g. `apex; sys_platform == 'never'`):
+    # The intent is to prevent the package from ever resolving. If removing
+    # the override pulls it into the lock, the override is load-bearing.
+    if not req.specifier:
+        if ver is None:
+            return Result(spec, "stale", f"{req.name} is not in the lock without the override")
+        return Result(spec, "load-bearing", f"removing override pulls {req.name}=={ver} into the lock")
+
+    if ver is None:
+        return Result(spec, "stale", f"{req.name} is not in the lock without the override")
+
+    try:
+        if Version(ver) in SpecifierSet(str(req.specifier)):
+            return Result(spec, "stale", f"resolves to {req.name}=={ver}, already satisfies {req.specifier}")
+    except InvalidVersion:
+        return Result(spec, "error", f"locked version {ver!r} for {req.name} is not PEP 440 compatible")
+
+    return Result(
+        spec,
+        "shaping",
+        f"resolves to {req.name}=={ver} without override (override forces {req.specifier})",
+    )
+
+
+def audit_one(repo: Path, spec: str, timeout: int) -> Result:
+    with tempfile.TemporaryDirectory(prefix="audit-overrides-") as td:
+        dst = Path(td) / "repo"
+        shutil.copytree(repo, dst, ignore=IGNORE_PATTERNS, symlinks=True)
+        remove_override_line(dst / "pyproject.toml", spec)
+        ok, log = run_uv_lock(dst, timeout=timeout)
+        return classify(spec, ok, log, dst / "uv.lock" if ok else None)
+
+
+def render_markdown(results: list[Result]) -> str:
+    buckets = {"stale": [], "shaping": [], "load-bearing": [], "error": []}
+    for r in results:
+        buckets.setdefault(r.category, []).append(r)
+
+    out = ["# Override Dependencies Audit", ""]
+    out.append(
+        f"Audited {len(results)} override(s): "
+        f"{len(buckets['stale'])} stale, "
+        f"{len(buckets['shaping'])} shaping, "
+        f"{len(buckets['load-bearing'])} load-bearing, "
+        f"{len(buckets['error'])} error.",
+    )
+    out.append("")
+
+    sections = [
+        ("stale", "Stale (safe to remove)"),
+        ("shaping", "Shaping (review -- override actively constrains resolution)"),
+        ("load-bearing", "Load-bearing (keep)"),
+        ("error", "Errors"),
+    ]
+    for key, title in sections:
+        if not buckets[key]:
+            continue
+        out.append(f"## {title}")
+        out.append("")
+        for r in buckets[key]:
+            out.append(f"- `{r.spec}` -- {r.detail}")
+        out.append("")
+    return "\n".join(out)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--repo", type=Path, default=Path.cwd(), help="Path to the Curator repo (default: cwd)")
+    parser.add_argument("--output", type=Path, help="Write markdown report to this path")
+    parser.add_argument("--json", dest="json_out", type=Path, help="Write JSON report to this path")
+    parser.add_argument("--timeout", type=int, default=600, help="Timeout per `uv lock` in seconds")
+    parser.add_argument("--only", action="append", default=[], help="Audit only specs matching substring (repeatable)")
+    parser.add_argument("--fail-on-stale", action="store_true", help="Exit non-zero if any override is stale")
+    args = parser.parse_args()
+
+    repo = args.repo.resolve()
+    overrides = load_overrides(repo / "pyproject.toml")
+    if args.only:
+        overrides = [o for o in overrides if any(s in o for s in args.only)]
+    if not overrides:
+        print("No overrides matched.", file=sys.stderr)
+        return 0
+
+    results: list[Result] = []
+    for i, spec in enumerate(overrides, 1):
+        print(f"[{i}/{len(overrides)}] Auditing: {spec}", file=sys.stderr)
+        try:
+            r = audit_one(repo, spec, timeout=args.timeout)
+        except subprocess.TimeoutExpired:
+            r = Result(spec, "error", f"`uv lock` exceeded timeout of {args.timeout}s")
+        except Exception as e:  # noqa: BLE001
+            r = Result(spec, "error", f"audit failed: {e}")
+        print(f"    -> {r.category}: {r.detail}", file=sys.stderr)
+        results.append(r)
+
+    md = render_markdown(results)
+    print(md)
+    if args.output:
+        args.output.write_text(md)
+    if args.json_out:
+        args.json_out.write_text(json.dumps([asdict(r) for r in results], indent=2))
+
+    if args.fail_on_stale and any(r.category == "stale" for r in results):
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    with contextlib.suppress(KeyboardInterrupt):
+        sys.exit(main())
diff --git a/.github/workflows/audit-overrides.yml b/.github/workflows/audit-overrides.yml
new file mode 100644
index 0000000000..ea96aed0d4
--- /dev/null
+++ b/.github/workflows/audit-overrides.yml
@@ -0,0 +1,70 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Weekly audit of `[tool.uv] override-dependencies` in pyproject.toml.
+# For each override, runs a fresh `uv lock` with that entry removed and
+# reports whether it is stale, shaping, or load-bearing.
+# Informational only -- does not gate PRs.
+
+name: Audit override dependencies
+
+on:
+  schedule:
+    - cron: "0 12 * * 1"  # Monday 12:00 UTC
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  UV_HTTP_TIMEOUT: 300
+  UV_CACHE_DIR: /tmp/uv-cache
+
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: astral-sh/setup-uv@v6
+        with:
+          python-version: "3.13"
+          enable-cache: true
+          cache-dependency-glob: "pyproject.toml"
+
+      - name: Run override audit
+        run: |
+          uv run --with packaging --no-project python .github/scripts/audit_overrides.py \
+            --output audit-report.md \
+            --json audit-report.json
+
+      - name: Append report to job summary
+        if: always()
+        run: |
+          if [ -f audit-report.md ]; then
+            cat audit-report.md >> "$GITHUB_STEP_SUMMARY"
+          else
+            echo "No audit report produced." >> "$GITHUB_STEP_SUMMARY"
+          fi
+
+      - uses: actions/upload-artifact@v6
+        if: always()
+        with:
+          name: override-audit-report
+          path: |
+            audit-report.md
+            audit-report.json
+          if-no-files-found: warn

From e6b57aac9ed7dbbd2729aa2a6e7e31e33aad03ac Mon Sep 17 00:00:00 2001
From: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 13:41:14 -0400
Subject: [PATCH 2/5] Address ruff

Signed-off-by: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/scripts/audit_overrides.py | 55 +++++++++++++++---------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/.github/scripts/audit_overrides.py b/.github/scripts/audit_overrides.py
index 71cc41f9b2..e998618964 100644
--- a/.github/scripts/audit_overrides.py
+++ b/.github/scripts/audit_overrides.py
@@ -35,10 +35,10 @@
 import subprocess
 import sys
 import tempfile
-import tomllib
 from dataclasses import asdict, dataclass
 from pathlib import Path
 
+import tomllib
 from packaging.requirements import Requirement
 from packaging.specifiers import SpecifierSet
 from packaging.version import InvalidVersion, Version
@@ -90,7 +90,7 @@ def run_uv_lock(workdir: Path, timeout: int) -> tuple[bool, str]:
     if lockfile.exists():
         lockfile.unlink()
     proc = subprocess.run(
-        ["uv", "lock"],
+        ["uv", "lock"],  # noqa: S607 -- uv is provided on PATH by the workflow / dev env
         cwd=workdir,
         capture_output=True,
         text=True,
@@ -110,40 +110,41 @@ def locked_version(lockfile: Path, name: str) -> str | None:
     return None
 
 
-def classify(spec: str, success: bool, log: str, lockfile: Path | None) -> Result:
-    if not success:
-        return Result(spec, "load-bearing", "removing the override breaks resolution", log[-600:])
-
-    assert lockfile is not None
-    try:
-        req = Requirement(spec)
-    except Exception as e:  # noqa: BLE001
-        return Result(spec, "error", f"failed to parse spec: {e}")
-
-    ver = locked_version(lockfile, req.name)
-
+def _categorize(req: Requirement, ver: str | None) -> tuple[str, str]:
     # "Ban" override (no specifier, e.g. `apex; sys_platform == 'never'`):
-    # The intent is to prevent the package from ever resolving. If removing
-    # the override pulls it into the lock, the override is load-bearing.
+    # the intent is to prevent the package from resolving anywhere. If
+    # removing the override pulls it into the lock, the override is load-bearing.
     if not req.specifier:
         if ver is None:
-            return Result(spec, "stale", f"{req.name} is not in the lock without the override")
-        return Result(spec, "load-bearing", f"removing override pulls {req.name}=={ver} into the lock")
+            return "stale", f"{req.name} is not in the lock without the override"
+        return "load-bearing", f"removing override pulls {req.name}=={ver} into the lock"
 
     if ver is None:
-        return Result(spec, "stale", f"{req.name} is not in the lock without the override")
+        return "stale", f"{req.name} is not in the lock without the override"
 
     try:
-        if Version(ver) in SpecifierSet(str(req.specifier)):
-            return Result(spec, "stale", f"resolves to {req.name}=={ver}, already satisfies {req.specifier}")
+        satisfies = Version(ver) in SpecifierSet(str(req.specifier))
     except InvalidVersion:
-        return Result(spec, "error", f"locked version {ver!r} for {req.name} is not PEP 440 compatible")
+        return "error", f"locked version {ver!r} for {req.name} is not PEP 440 compatible"
 
-    return Result(
-        spec,
-        "shaping",
-        f"resolves to {req.name}=={ver} without override (override forces {req.specifier})",
-    )
+    if satisfies:
+        return "stale", f"resolves to {req.name}=={ver}, already satisfies {req.specifier}"
+    return "shaping", f"resolves to {req.name}=={ver} without override (override forces {req.specifier})"
+
+
+def classify(spec: str, success: bool, log: str, lockfile: Path | None) -> Result:
+    if not success:
+        return Result(spec, "load-bearing", "removing the override breaks resolution", log[-600:])
+    if lockfile is None:
+        return Result(spec, "error", "lock succeeded but no lockfile was produced")
+
+    try:
+        req = Requirement(spec)
+    except Exception as e:  # noqa: BLE001
+        return Result(spec, "error", f"failed to parse spec: {e}")
+
+    category, detail = _categorize(req, locked_version(lockfile, req.name))
+    return Result(spec, category, detail)
 
 
 def audit_one(repo: Path, spec: str, timeout: int) -> Result:

From 640b52a433b4a05feb0304cfee93886c7490dab6 Mon Sep 17 00:00:00 2001
From: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 13:44:19 -0400
Subject: [PATCH 3/5] Add comments to function

Signed-off-by: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/scripts/audit_overrides.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/scripts/audit_overrides.py b/.github/scripts/audit_overrides.py
index e998618964..ca46a54582 100644
--- a/.github/scripts/audit_overrides.py
+++ b/.github/scripts/audit_overrides.py
@@ -67,6 +67,7 @@ class Result:
 
 
 def load_overrides(pyproject: Path) -> list[str]:
+    """Return every spec listed under [tool.uv] override-dependencies."""
     data = tomllib.loads(pyproject.read_text())
     return list(data.get("tool", {}).get("uv", {}).get("override-dependencies", []))
 
@@ -86,6 +87,7 @@ def remove_override_line(pyproject: Path, spec: str) -> None:
 
 
 def run_uv_lock(workdir: Path, timeout: int) -> tuple[bool, str]:
+    """Run a fresh `uv lock` in workdir and return (success, combined output)."""
     lockfile = workdir / "uv.lock"
     if lockfile.exists():
         lockfile.unlink()
@@ -101,6 +103,7 @@ def run_uv_lock(workdir: Path, timeout: int) -> tuple[bool, str]:
 
 
 def locked_version(lockfile: Path, name: str) -> str | None:
+    """Return the resolved version of `name` from uv.lock, or None if absent."""
     canonical = name.lower().replace("_", "-")
     data = tomllib.loads(lockfile.read_text())
     for pkg in data.get("package", []):
@@ -111,6 +114,7 @@ def locked_version(lockfile: Path, name: str) -> str | None:
 
 
 def _categorize(req: Requirement, ver: str | None) -> tuple[str, str]:
+    """Decide (category, detail) given the parsed override and the resolved version."""
     # "Ban" override (no specifier, e.g. `apex; sys_platform == 'never'`):
     # the intent is to prevent the package from resolving anywhere. If
     # removing the override pulls it into the lock, the override is load-bearing.
@@ -133,6 +137,7 @@ def _categorize(req: Requirement, ver: str | None) -> tuple[str, str]:
 
 
 def classify(spec: str, success: bool, log: str, lockfile: Path | None) -> Result:
+    """Wrap a uv lock outcome into a Result with the appropriate category."""
     if not success:
         return Result(spec, "load-bearing", "removing the override breaks resolution", log[-600:])
     if lockfile is None:
@@ -148,6 +153,7 @@ def classify(spec: str, success: bool, log: str, lockfile: Path | None) -> Resul
 
 
 def audit_one(repo: Path, spec: str, timeout: int) -> Result:
+    """Copy the repo to a temp dir, drop one override, re-lock, and classify."""
     with tempfile.TemporaryDirectory(prefix="audit-overrides-") as td:
         dst = Path(td) / "repo"
         shutil.copytree(repo, dst, ignore=IGNORE_PATTERNS, symlinks=True)
@@ -157,6 +163,7 @@ def audit_one(repo: Path, spec: str, timeout: int) -> Result:
 
 
 def render_markdown(results: list[Result]) -> str:
+    """Render the audit results as a categorized markdown report."""
     buckets = {"stale": [], "shaping": [], "load-bearing": [], "error": []}
     for r in results:
         buckets.setdefault(r.category, []).append(r)
@@ -189,6 +196,7 @@ def render_markdown(results: list[Result]) -> str:
 
 
 def main() -> int:
+    """Parse CLI args, run the audit over every override, and emit reports."""
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument("--repo", type=Path, default=Path.cwd(), help="Path to the Curator repo (default: cwd)")
     parser.add_argument("--output", type=Path, help="Write markdown report to this path")

From faea65ed725b2c89f701971810e251e1aa7f1e10 Mon Sep 17 00:00:00 2001
From: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 14:08:48 -0400
Subject: [PATCH 4/5] feat: Add defensive label for auditting

Signed-off-by: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/scripts/audit_overrides.py | 138 ++++++++++++++++++++++++-----
 1 file changed, 115 insertions(+), 23 deletions(-)

diff --git a/.github/scripts/audit_overrides.py b/.github/scripts/audit_overrides.py
index ca46a54582..1ee3c3d67c 100644
--- a/.github/scripts/audit_overrides.py
+++ b/.github/scripts/audit_overrides.py
@@ -19,8 +19,11 @@
   - shaping:     resolves cleanly but the locked version violates the override
                  spec (override is actively pinning -- review whether the
                  alternate version is acceptable)
-  - stale:       resolves cleanly with a satisfying version, or the package is
-                 not in the lock at all (override is a no-op -- removable)
+  - defensive:   removal still resolves to a satisfying version today, but the
+                 override's *inverse* range is independently resolvable -- so
+                 the override is protecting against an upstream regression
+  - stale:       removal resolves cleanly to a satisfying version AND the
+                 inverse range is unsatisfiable -- the override is a true no-op
 
 Outputs a markdown report to stdout and optionally to --output / --json.
 """
@@ -35,33 +38,39 @@
 import subprocess
 import sys
 import tempfile
+from collections.abc import Iterator  # noqa: TC003 -- annotation use only, but the cost is nil
 from dataclasses import asdict, dataclass
 from pathlib import Path
 
 import tomllib
-from packaging.requirements import Requirement
+from packaging.requirements import InvalidRequirement, Requirement
 from packaging.specifiers import SpecifierSet
 from packaging.version import InvalidVersion, Version
 
-IGNORE_PATTERNS = shutil.ignore_patterns(
-    ".git",
-    ".venv",
-    "venv",
-    "__pycache__",
-    "*.egg-info",
-    ".pytest_cache",
-    ".ruff_cache",
-    ".mypy_cache",
-    "node_modules",
-    "build",
-    "dist",
-)
+# What `uv lock` reads from the project: the spec, the package source (for the
+# dynamic version attr in nemo_curator.package_info), and README.md (referenced
+# as the project's `readme`). Anything else (tests, tutorials, docs, .git, ...)
+# is irrelevant to resolution. Update this list if pyproject.toml starts
+# referencing additional files.
+LOCK_INPUTS = ("pyproject.toml", "nemo_curator", "README.md")
+
+
+def stage_repo(repo: Path, dst: Path) -> None:
+    """Copy just the files `uv lock` needs from `repo` into `dst`."""
+    dst.mkdir(parents=True, exist_ok=True)
+    for name in LOCK_INPUTS:
+        src = repo / name
+        target = dst / name
+        if src.is_dir():
+            shutil.copytree(src, target, symlinks=True)
+        else:
+            shutil.copy2(src, target)
 
 
 @dataclass
 class Result:
     spec: str
-    category: str  # load-bearing | shaping | stale | error
+    category: str  # load-bearing | shaping | defensive | stale | error
     detail: str
     log_excerpt: str = ""
 
@@ -86,6 +95,62 @@ def remove_override_line(pyproject: Path, spec: str) -> None:
     pyproject.write_text(new_text)
 
 
+def replace_override_line(pyproject: Path, old_spec: str, new_spec: str) -> None:
+    """Replace the override entry for `old_spec` with `new_spec` in pyproject.toml."""
+    text = pyproject.read_text()
+    pattern = re.compile(
+        r'^([ \t]*)"' + re.escape(old_spec) + r'"([ \t]*,?[ \t]*(?:#.*)?\n)',
+        re.MULTILINE,
+    )
+    new_text, count = pattern.subn(
+        lambda m: f'{m.group(1)}"{new_spec}"{m.group(2)}',
+        text,
+        count=1,
+    )
+    if count != 1:
+        msg = f"Could not locate override line for {old_spec!r} in {pyproject}"
+        raise RuntimeError(msg)
+    pyproject.write_text(new_text)
+
+
+def derive_inverse_specs(spec: str) -> list[str]:
+    """Return inverse specs covering ranges the override currently excludes.
+
+    A single-bound spec like `torchcodec>=0.9.0` yields `["torchcodec<0.9.0"]`.
+    A compound spec like `numpy>=2.0.0,<=2.2.0` yields both
+    `["numpy<2.0.0", "numpy>2.2.0"]`. Markers are preserved.
+    Returns an empty list for ban overrides (no specifier) and pin/exclusion
+    operators (`==`, `!=`, `~=`) that don't have a clean single-clause inverse.
+    """
+    try:
+        req = Requirement(spec)
+    except InvalidRequirement:
+        return []
+    if not req.specifier:
+        return []  # ban override -- already covered by the removal test
+
+    flip = {">=": "<", ">": "<=", "<=": ">", "<": ">="}
+    inverses: list[str] = []
+    for clause in req.specifier:
+        inv_op = flip.get(clause.operator)
+        if inv_op is None:
+            continue  # skip ==, !=, ~=
+        body = f"{req.name}{inv_op}{clause.version}"
+        if req.marker:
+            body = f"{body}; {req.marker}"
+        inverses.append(body)
+    return inverses
+
+
+@contextlib.contextmanager
+def _staged_repo(repo: Path) -> Iterator[Path]:
+    """Yield a temp staging copy of `repo` containing only LOCK_INPUTS."""
+    with tempfile.TemporaryDirectory(prefix="audit-overrides-") as td:
+        dst = Path(td) / "repo"
+        stage_repo(repo, dst)
+        yield dst
+
+
 def run_uv_lock(workdir: Path, timeout: int) -> tuple[bool, str]:
     """Run a fresh `uv lock` in workdir and return (success, combined output)."""
     lockfile = workdir / "uv.lock"
@@ -153,18 +218,43 @@ def classify(spec: str, success: bool, log: str, lockfile: Path | None) -> Resul
 
 
 def audit_one(repo: Path, spec: str, timeout: int) -> Result:
-    """Copy the repo to a temp dir, drop one override, re-lock, and classify."""
-    with tempfile.TemporaryDirectory(prefix="audit-overrides-") as td:
-        dst = Path(td) / "repo"
-        shutil.copytree(repo, dst, ignore=IGNORE_PATTERNS, symlinks=True)
+    """Drop the override, re-lock, classify, and on `stale` run the inverse test."""
+    with _staged_repo(repo) as dst:
         remove_override_line(dst / "pyproject.toml", spec)
         ok, log = run_uv_lock(dst, timeout=timeout)
-        return classify(spec, ok, log, dst / "uv.lock" if ok else None)
+        primary = classify(spec, ok, log, dst / "uv.lock" if ok else None)
+
+    if primary.category != "stale":
+        return primary
+
+    # Differentiate "truly stale" from "defensive": replace the override with
+    # its inverse and check whether any package in the graph would otherwise
+    # allow that range. If yes, the override is protective.
+    inverses = derive_inverse_specs(spec)
+    if not inverses:
+        return primary  # not invertible; leave as stale
+
+    for inverse in inverses:
+        with _staged_repo(repo) as dst:
+            replace_override_line(dst / "pyproject.toml", spec, inverse)
+            inverse_ok, _ = run_uv_lock(dst, timeout=timeout)
+        if inverse_ok:
+            return Result(
+                spec,
+                "defensive",
+                f"natural resolution satisfies the override, but `{inverse}` is also resolvable -- override is protective",
+            )
+
+    return Result(
+        spec,
+        "stale",
+        f"{primary.detail}; inverse range is unsatisfiable -- truly redundant",
+    )
 
 
 def render_markdown(results: list[Result]) -> str:
     """Render the audit results as a categorized markdown report."""
-    buckets = {"stale": [], "shaping": [], "load-bearing": [], "error": []}
+    buckets = {"stale": [], "defensive": [], "shaping": [], "load-bearing": [], "error": []}
     for r in results:
         buckets.setdefault(r.category, []).append(r)
 
@@ -172,6 +262,7 @@ def render_markdown(results: list[Result]) -> str:
     out.append(
         f"Audited {len(results)} override(s): "
         f"{len(buckets['stale'])} stale, "
+        f"{len(buckets['defensive'])} defensive, "
         f"{len(buckets['shaping'])} shaping, "
         f"{len(buckets['load-bearing'])} load-bearing, "
         f"{len(buckets['error'])} error.",
@@ -180,6 +271,7 @@ def render_markdown(results: list[Result]) -> str:
 
     sections = [
         ("stale", "Stale (safe to remove)"),
+        ("defensive", "Defensive (redundant today, protects against upstream regression)"),
         ("shaping", "Shaping (review -- override actively constrains resolution)"),
         ("load-bearing", "Load-bearing (keep)"),
         ("error", "Errors"),

From 437a5ae903b342604bcbebac67d75d5c9472c1ed Mon Sep 17 00:00:00 2001
From: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Date: Tue, 28 Apr 2026 14:16:28 -0400
Subject: [PATCH 5/5] fix: ruff target-version

Signed-off-by: Dong Hyuk Chang <9426164+thomasdhc@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .github/scripts/audit_overrides.py | 2 +-
 pyproject.toml                     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/audit_overrides.py b/.github/scripts/audit_overrides.py
index 1ee3c3d67c..db64d6b372 100644
--- a/.github/scripts/audit_overrides.py
+++ b/.github/scripts/audit_overrides.py
@@ -38,11 +38,11 @@
 import subprocess
 import sys
 import tempfile
+import tomllib
 from collections.abc import Iterator  # noqa: TC003 -- annotation use only, but the cost is nil
 from dataclasses import asdict, dataclass
 from pathlib import Path
 
-import tomllib
 from packaging.requirements import InvalidRequirement, Requirement
 from packaging.specifiers import SpecifierSet
 from packaging.version import InvalidVersion, Version
diff --git a/pyproject.toml b/pyproject.toml
index 98060acb97..e9b25e21d9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -339,6 +339,7 @@ source = ["nemo_curator", "/opt/Curator/nemo_curator", "/home/runner/work/Curato
 
 [tool.ruff]
 line-length = 119
+target-version = "py311"
 [tool.ruff.lint]
 select = ["ALL"]
 ignore = [