contentauth · tmathern · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
diff --git a/src/c2pa/c2pa.py b/src/c2pa/c2pa.py
@@ -1812,7 +1812,26 @@ def read_callback(ctx, data, length):
                 if not data or length <= 0:
                     return -1
 
-                buffer = self._file_like_stream.read(length)
+                stream = self._file_like_stream
+                readinto = getattr(stream, "readinto", None)
+                if readinto is not None:
+                    # Zero-copy fast path. from_address wraps the native buffer
+                    # Rust handed us (size == length) without a cast object and
+                    # without allocating a length-sized bytes. readinto fills it
+                    # directly and returns the byte count, so there is no
+                    # intermediate bytes, no len()/min(), and no memmove copy.
+                    # Every binary stream (BytesIO, BufferedReader, FileIO,
+                    # BufferedRandom) implements readinto; only text-mode or
+                    # custom duck-typed streams lack it and fall through below.
+                    # data is a POINTER(c_uint8); addressof(.contents) gives the
+                    # raw int address from_address needs (no cast object).
+                    buf = (ctypes.c_char * length).from_address(
+                        ctypes.addressof(data.contents))
+                    n = readinto(buf)
+                    return n if n else 0
+
+                # Fallback for streams without readinto.
+                buffer = stream.read(length)
                 if not buffer:  # EOF
                     return 0
 
@@ -1846,8 +1865,14 @@ def seek_callback(ctx, offset, whence):
             if not self._initialized or self._closed:
                 return -1
             try:
-                file_stream.seek(offset, whence)
-                return file_stream.tell()
+                # io.IOBase.seek returns the new absolute position, which is
+                # exactly what the Rust seek callback expects (see
+                # c2pa_stream.rs). Use it directly and skip a separate tell()
+                # call, which would allocate another Python int on every seek.
+                # Fall back to tell() only for stream objects that do not honor
+                # the return-value contract and return None.
+                pos = file_stream.seek(offset, whence)
+                return pos if pos is not None else file_stream.tell()
             except Exception:
                 return -1
 

diff --git a/tests/perf/README.md b/tests/perf/README.md
@@ -95,6 +95,8 @@ Why it's useful: temporary allocations are not leaks, since the memory is return
 
 How to read it: wide frames are the biggest sources of throwaway allocations. The view may be sparse or empty for a scenario that does little churn, which is itself a valid result. See [Temporary allocations](#temporary-allocations).
 
+The temporary view is the heaviest to render: memray holds every allocation and free to decide which are short-lived. On a very large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario) the render can run out of memory and fail. The run does not abort in that case; it records what failed and keeps going. See [Troubleshooting](#troubleshooting).
+
 ## Running without Docker (if memray is supported and installed locally)
 
 ```bash
@@ -217,3 +219,37 @@ make memory-use-bench PERF_ARGS=--update-baseline
 ```
 
 Commit the updated `baseline.json` alongside the code change, so it becomes the new reference to compare against.
+
+## Troubleshooting
+
+### A flamegraph render fails with `exit -9`
+
+You may see a message like `flamegraph render failed for reader_mp4-...-temporary.html (killed (likely OOM))`. The `-9` is SIGKILL: the operating system's out-of-memory killer terminated the `memray flamegraph` subprocess. The temporary view is the heaviest to render, and on a large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario such as `reader_mp4`) it can exhaust available memory.
+
+The run does not abort. The capture and the metrics (`peak_bytes`, `leaked_bytes`, `total_allocations`) are read separately and are still recorded, the baseline is still written, and the run lists every failed render at the end. Only the HTML render is missing, and you have two ways to regenerate it.
+
+#### Option A: rerun the one scenario
+
+A single-scenario run renders one capture at a time with nothing else resident, so it often fits where the full suite did not:
+
+```bash
+make memory-use-bench SCENARIO=reader_mp4
+```
+
+If it still runs out of memory, lower the iteration count to shrink the capture:
+
+```bash
+make memory-use-bench SCENARIO=reader_mp4 MEMRAY_ITERATIONS=20
+```
+
+A lower iteration count makes that scenario's absolute allocation numbers no longer directly comparable to a full 100-iteration run.
+
+#### Option B: re-render the kept capture (no re-profiling)
+
+When a render fails, the run keeps that scenario's capture as `reports/<scenario>-<env>.bin`. Re-render just the failed view from that file with a higher temporary-allocation threshold, which cuts how much memray holds in memory so the render fits. This uses the original run's data, so the result stays comparable to the rest of the run:
+
+```bash
+python3 -m memray flamegraph reports/reader_mp4-python-3.12-slim.bin \
+  -o reports/reader_mp4-python-3.12-slim-temporary.html \
+  --temporary-allocations --temporary-allocation-threshold=10 --force
+```
diff --git a/tests/perf/run_profile.py b/tests/perf/run_profile.py
@@ -27,6 +27,7 @@
 import argparse
 import json
 import os
+import shutil
 import subprocess
 import sys
 import tempfile
@@ -74,7 +75,7 @@ def _run_scenario_under_memray(name: str, bin_path: Path) -> None:
         sys.exit(1)
 
 
-def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> None:
+def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> bool:
     """Render one flamegraph view of a capture file.
 
     mode:
@@ -94,8 +95,13 @@ def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") ->
     print(f"    flamegraph ({mode})...", flush=True)
     result = subprocess.run(cmd, text=True)
     if result.returncode != 0:
-        print(f"  flamegraph generation failed for {out_path.name} (exit {result.returncode})", file=sys.stderr)
-        sys.exit(1)
+        # -9 is SIGKILL, almost always the OOM killer reaping the heavy
+        # temporary render on a large capture. Do not abort the whole run:
+        # the capture and metrics are recorded separately and still good.
+        reason = "killed (likely OOM)" if result.returncode == -9 else f"exit {result.returncode}"
+        print(f"  flamegraph {mode} render failed for {out_path.name} ({reason})", file=sys.stderr)
+        return False
+    return True
 
 
 # get_allocation_records() yields deallocation records too...
@@ -189,6 +195,7 @@ def main() -> None:
 
     results: dict = {}
     failures: list[str] = []
+    render_failures: list[dict] = []
 
     total = len(scenarios_to_run)
     for idx, name in enumerate(scenarios_to_run, 1):
@@ -197,18 +204,25 @@ def main() -> None:
         with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
             bin_path = Path(tmp.name)
 
+        env_tag = f"-{PERF_ENV}" if PERF_ENV else ""
+        scenario_render_failed = False
+        failed_modes: list[dict] = []
         try:
             print(f"  profiling...")
             _run_scenario_under_memray(name, bin_path)
 
-            env_tag = f"-{PERF_ENV}" if PERF_ENV else ""
             peak_html = REPORTS_DIR / f"{name}{env_tag}-peak.html"
             leaks_html = REPORTS_DIR / f"{name}{env_tag}-leaks.html"
             temporary_html = REPORTS_DIR / f"{name}{env_tag}-temporary.html"
             print(f"  generating flamegraphs (peak + leaks + temporary)...")
-            _generate_flamegraph(bin_path, peak_html, mode="peak")
-            _generate_flamegraph(bin_path, leaks_html, mode="leaks")
-            _generate_flamegraph(bin_path, temporary_html, mode="temporary")
+            scenario_render_failed = False
+            failed_modes: list[dict] = []
+            for mode, html in (("peak", peak_html),
+                               ("leaks", leaks_html),
+                               ("temporary", temporary_html)):
+                if not _generate_flamegraph(bin_path, html, mode=mode):
+                    scenario_render_failed = True
+                    failed_modes.append({"name": name, "mode": mode, "html": html.name})
 
             print(f"  reading metrics...", flush=True)
             metrics = _read_metrics(bin_path)
@@ -234,7 +248,17 @@ def main() -> None:
                             f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
                         )
         finally:
-            bin_path.unlink(missing_ok=True)
+            if scenario_render_failed:
+                # Keep the capture so the failed view can be re-rendered
+                # offline (with a higher --temporary-allocation-threshold)
+                # instead of re-profiling the whole scenario.
+                kept = REPORTS_DIR / f"{name}{env_tag}.bin"
+                shutil.move(str(bin_path), str(kept))
+                for fm in failed_modes:
+                    fm["bin"] = str(kept)
+                render_failures.extend(failed_modes)
+            else:
+                bin_path.unlink(missing_ok=True)
 
     if args.update_baseline or not baseline:
         # When running a single scenario, merge its result into the existing
@@ -250,6 +274,14 @@ def main() -> None:
         verb = "Updated" if baseline else "Created"
         print(f"\n{verb} baseline: {BASELINE_FILE}")
 
+    if render_failures:
+        print("\nFLAMEGRAPH RENDERS FAILED (capture + metrics still recorded):", file=sys.stderr)
+        for r in render_failures:
+            print(f"  {r['name']} [{r['mode']}] -> {r['html']}  (capture kept: {r['bin']})", file=sys.stderr)
+        print("  Recover without re-profiling, e.g.:", file=sys.stderr)
+        print("    python3 -m memray flamegraph <kept.bin> -o <out.html> "
+              "--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr)
+
     if failures:
         print("\nREGRESSIONS DETECTED:", file=sys.stderr)
         for f in failures: