Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions src/c2pa/c2pa.py
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,26 @@ def read_callback(ctx, data, length):
if not data or length <= 0:
return -1

buffer = self._file_like_stream.read(length)
stream = self._file_like_stream
readinto = getattr(stream, "readinto", None)
if readinto is not None:
# Zero-copy fast path. from_address wraps the native buffer
# Rust handed us (size == length) without a cast object and
# without allocating a length-sized bytes. readinto fills it
# directly and returns the byte count, so there is no
# intermediate bytes, no len()/min(), and no memmove copy.
# Every binary stream (BytesIO, BufferedReader, FileIO,
# BufferedRandom) implements readinto; only text-mode or
# custom duck-typed streams lack it and fall through below.
# data is a POINTER(c_uint8); addressof(.contents) gives the
# raw int address from_address needs (no cast object).
buf = (ctypes.c_char * length).from_address(
ctypes.addressof(data.contents))
n = readinto(buf)
return n if n else 0

# Fallback for streams without readinto.
buffer = stream.read(length)
if not buffer: # EOF
return 0

Expand Down Expand Up @@ -1846,8 +1865,14 @@ def seek_callback(ctx, offset, whence):
if not self._initialized or self._closed:
return -1
try:
file_stream.seek(offset, whence)
return file_stream.tell()
# io.IOBase.seek returns the new absolute position, which is
# exactly what the Rust seek callback expects (see
# c2pa_stream.rs). Use it directly and skip a separate tell()
# call, which would allocate another Python int on every seek.
# Fall back to tell() only for stream objects that do not honor
# the return-value contract and return None.
pos = file_stream.seek(offset, whence)
return pos if pos is not None else file_stream.tell()
except Exception:
return -1

Expand Down
36 changes: 36 additions & 0 deletions tests/perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ Why it's useful: temporary allocations are not leaks, since the memory is return

How to read it: wide frames are the biggest sources of throwaway allocations. The view may be sparse or empty for a scenario that does little churn, which is itself a valid result. See [Temporary allocations](#temporary-allocations).

The temporary view is the heaviest to render: memray holds every allocation and free to decide which are short-lived. On a very large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario) the render can run out of memory and fail. The run does not abort in that case; it records what failed and keeps going. See [Troubleshooting](#troubleshooting).

## Running without Docker (if memray is supported and installed locally)

```bash
Expand Down Expand Up @@ -217,3 +219,37 @@ make memory-use-bench PERF_ARGS=--update-baseline
```

Commit the updated `baseline.json` alongside the code change, so it becomes the new reference to compare against.

## Troubleshooting

### A flamegraph render fails with `exit -9`

You may see a message like `flamegraph render failed for reader_mp4-...-temporary.html (killed (likely OOM))`. The `-9` is SIGKILL: the operating system's out-of-memory killer terminated the `memray flamegraph` subprocess. The temporary view is the heaviest to render, and on a large capture (a long run, a high `MEMRAY_ITERATIONS`, or a churn-heavy scenario such as `reader_mp4`) it can exhaust available memory.

The run does not abort. The capture and the metrics (`peak_bytes`, `leaked_bytes`, `total_allocations`) are read separately and are still recorded, the baseline is still written, and the run lists every failed render at the end. Only the HTML render is missing, and you have two ways to regenerate it.

#### Option A: rerun the one scenario

A single-scenario run renders one capture at a time with nothing else resident, so it often fits where the full suite did not:

```bash
make memory-use-bench SCENARIO=reader_mp4
```

If it still runs out of memory, lower the iteration count to shrink the capture:

```bash
make memory-use-bench SCENARIO=reader_mp4 MEMRAY_ITERATIONS=20
```

A lower iteration count makes that scenario's absolute allocation numbers no longer directly comparable to a full 100-iteration run.

#### Option B: re-render the kept capture (no re-profiling)

When a render fails, the run keeps that scenario's capture as `reports/<scenario>-<env>.bin`. Re-render just the failed view from that file with a higher temporary-allocation threshold, which cuts how much memray holds in memory so the render fits. This uses the original run's data, so the result stays comparable to the rest of the run:

```bash
python3 -m memray flamegraph reports/reader_mp4-python-3.12-slim.bin \
-o reports/reader_mp4-python-3.12-slim-temporary.html \
--temporary-allocations --temporary-allocation-threshold=10 --force
```
48 changes: 40 additions & 8 deletions tests/perf/run_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import argparse
import json
import os
import shutil
import subprocess
import sys
import tempfile
Expand Down Expand Up @@ -74,7 +75,7 @@ def _run_scenario_under_memray(name: str, bin_path: Path) -> None:
sys.exit(1)


def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> None:
def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") -> bool:
"""Render one flamegraph view of a capture file.

mode:
Expand All @@ -94,8 +95,13 @@ def _generate_flamegraph(bin_path: Path, out_path: Path, mode: str = "peak") ->
print(f" flamegraph ({mode})...", flush=True)
result = subprocess.run(cmd, text=True)
if result.returncode != 0:
print(f" flamegraph generation failed for {out_path.name} (exit {result.returncode})", file=sys.stderr)
sys.exit(1)
# -9 is SIGKILL, almost always the OOM killer reaping the heavy
# temporary render on a large capture. Do not abort the whole run:
# the capture and metrics are recorded separately and still good.
reason = "killed (likely OOM)" if result.returncode == -9 else f"exit {result.returncode}"
print(f" flamegraph {mode} render failed for {out_path.name} ({reason})", file=sys.stderr)
return False
return True


# get_allocation_records() yields deallocation records too...
Expand Down Expand Up @@ -189,6 +195,7 @@ def main() -> None:

results: dict = {}
failures: list[str] = []
render_failures: list[dict] = []

total = len(scenarios_to_run)
for idx, name in enumerate(scenarios_to_run, 1):
Expand All @@ -197,18 +204,25 @@ def main() -> None:
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as tmp:
bin_path = Path(tmp.name)

env_tag = f"-{PERF_ENV}" if PERF_ENV else ""
scenario_render_failed = False
failed_modes: list[dict] = []
try:
print(f" profiling...")
_run_scenario_under_memray(name, bin_path)

env_tag = f"-{PERF_ENV}" if PERF_ENV else ""
peak_html = REPORTS_DIR / f"{name}{env_tag}-peak.html"
leaks_html = REPORTS_DIR / f"{name}{env_tag}-leaks.html"
temporary_html = REPORTS_DIR / f"{name}{env_tag}-temporary.html"
print(f" generating flamegraphs (peak + leaks + temporary)...")
_generate_flamegraph(bin_path, peak_html, mode="peak")
_generate_flamegraph(bin_path, leaks_html, mode="leaks")
_generate_flamegraph(bin_path, temporary_html, mode="temporary")
scenario_render_failed = False
failed_modes: list[dict] = []
for mode, html in (("peak", peak_html),
("leaks", leaks_html),
("temporary", temporary_html)):
if not _generate_flamegraph(bin_path, html, mode=mode):
scenario_render_failed = True
failed_modes.append({"name": name, "mode": mode, "html": html.name})

print(f" reading metrics...", flush=True)
metrics = _read_metrics(bin_path)
Expand All @@ -234,7 +248,17 @@ def main() -> None:
f" (+{diff_pct:.1f}%, threshold {(THRESHOLD-1)*100:.0f}%)"
)
finally:
bin_path.unlink(missing_ok=True)
if scenario_render_failed:
# Keep the capture so the failed view can be re-rendered
# offline (with a higher --temporary-allocation-threshold)
# instead of re-profiling the whole scenario.
kept = REPORTS_DIR / f"{name}{env_tag}.bin"
shutil.move(str(bin_path), str(kept))
for fm in failed_modes:
fm["bin"] = str(kept)
render_failures.extend(failed_modes)
else:
bin_path.unlink(missing_ok=True)

if args.update_baseline or not baseline:
# When running a single scenario, merge its result into the existing
Expand All @@ -250,6 +274,14 @@ def main() -> None:
verb = "Updated" if baseline else "Created"
print(f"\n{verb} baseline: {BASELINE_FILE}")

if render_failures:
print("\nFLAMEGRAPH RENDERS FAILED (capture + metrics still recorded):", file=sys.stderr)
for r in render_failures:
print(f" {r['name']} [{r['mode']}] -> {r['html']} (capture kept: {r['bin']})", file=sys.stderr)
print(" Recover without re-profiling, e.g.:", file=sys.stderr)
print(" python3 -m memray flamegraph <kept.bin> -o <out.html> "
"--temporary-allocations --temporary-allocation-threshold=10 --force", file=sys.stderr)

if failures:
print("\nREGRESSIONS DETECTED:", file=sys.stderr)
for f in failures:
Expand Down
Loading