Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ Breaking:

Features:

- Nothing (yet)
- Add an ``out`` parameter to :meth:`.VideoFrame.to_ndarray` to write into a preallocated array and avoid a per-frame allocation.
- Add :meth:`.VideoPlane.to_ndarray` to read a single, single-component plane (e.g. just the luma plane of a planar YUV frame) as a 2D array, with an optional ``out`` buffer.

Fixes:

Expand Down
122 changes: 92 additions & 30 deletions av/video/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,18 @@ def byteswap_array(array, big_endian: cython.bint):
return array


@cython.cfunc
def _check_out(out, shape, dtype):
import numpy as np

if not out.flags["C_CONTIGUOUS"]:
raise ValueError("out must be a C-contiguous array")
if tuple(out.shape) != tuple(shape):
raise ValueError(f"out must have shape {tuple(shape)}, got {tuple(out.shape)}")
if out.dtype != np.dtype(dtype):
raise ValueError(f"out must have dtype {np.dtype(dtype)}, got {out.dtype}")


@cython.cfunc
def copy_bytes_to_plane(
img_bytes,
Expand Down Expand Up @@ -736,7 +748,7 @@ def to_image(self, **kwargs):
"RGB", (plane.width, plane.height), bytes(o_buf), "raw", "RGB", 0, 1
)

def to_ndarray(self, channel_last=False, **kwargs):
def to_ndarray(self, channel_last=False, out=None, **kwargs):
"""Get a numpy array of this frame.

Any ``**kwargs`` are passed to :meth:`.VideoReformatter.reformat`.
Expand All @@ -746,6 +758,12 @@ def to_ndarray(self, channel_last=False, **kwargs):
:param bool channel_last: If True, the shape of array will be
(height, width, channels) rather than (channels, height, width) for
the "yuv444p" and "yuvj444p" formats.
:param out: An optional, preallocated, C-contiguous numpy array to copy
the result into. It must have exactly the shape and dtype that this
method would otherwise allocate; ``out`` is then returned in place of
a freshly allocated array. This lets callers reuse a buffer and avoid
a per-frame allocation. Not supported for the ``pal8`` format, which
returns a tuple.

.. note:: Numpy must be installed.

Expand Down Expand Up @@ -784,34 +802,80 @@ def to_ndarray(self, channel_last=False, **kwargs):
itemsize: cython.uint
itemsize, dtype = _np_pix_fmt_dtypes[format_name]
num_planes: cython.size_t = len(planes)
if num_planes == 1: # shortcut, avoid memory copy
array = useful_array(planes[0], itemsize, dtype)
else: # general case
array = np.empty(
(frame.ptr.height, frame.ptr.width, num_planes), dtype=dtype
big_endian: cython.bint = format_name.endswith("be")
transpose: cython.bint = not channel_last and format_name in {
"yuv444p",
"yuvj444p",
}

if num_planes == 1: # shortcut, avoid a memory copy when out is None
array = byteswap_array(
useful_array(planes[0], itemsize, dtype), big_endian
)
if out is None:
return array
_check_out(out, array.shape, dtype)
out[...] = array
return out

shape = (frame.ptr.height, frame.ptr.width, num_planes)
if transpose:
expected = (num_planes, frame.ptr.height, frame.ptr.width)
else:
expected = shape
if out is not None:
_check_out(out, expected, dtype)

# Fill the channels straight into `out` when its layout matches;
# only the (channel, height, width) transpose needs a scratch array.
array = np.empty(shape, dtype=dtype) if (out is None or transpose) else out
if format_name.startswith("gbr"):
plane_indices = (2, 0, 1, *range(3, num_planes))
else:
plane_indices = range(num_planes)
for i, p_idx in enumerate(plane_indices):
array[:, :, i] = byteswap_array(
useful_array(planes[p_idx], itemsize, dtype), big_endian
)
if format_name.startswith("gbr"):
plane_indices = (2, 0, 1, *range(3, num_planes))
else:
plane_indices = range(num_planes)
for i, p_idx in enumerate(plane_indices):
array[:, :, i] = useful_array(planes[p_idx], itemsize, dtype)
array = byteswap_array(array, format_name.endswith("be"))
if not channel_last and format_name in {"yuv444p", "yuvj444p"}:
array = np.moveaxis(array, 2, 0)
return array

if not transpose:
return array
if out is None:
return np.moveaxis(array, 2, 0)
out[...] = np.moveaxis(array, 2, 0)
return out
Comment thread
hmaarrfk marked this conversation as resolved.

# special cases
if format_name in {"yuv420p", "yuvj420p", "yuv422p"}:

# Planar formats we expose as a single (height * k, width) array by
# flattening and concatenating the planes. With `out`, copy each plane
# into its slice of `out` directly and skip the hstack allocation.
if format_name in {"yuv420p", "yuvj420p", "yuv422p", "nv12"}:
assert frame.ptr.width % 2 == 0, "width has to be even for this format"
assert frame.ptr.height % 2 == 0, "height has to be even for this format"
return np.hstack(
[
if format_name == "nv12":
flats = [
useful_array(planes[0]).reshape(-1),
useful_array(planes[1], 2).reshape(-1),
]
else:
flats = [
useful_array(planes[0]).reshape(-1),
useful_array(planes[1]).reshape(-1),
useful_array(planes[2]).reshape(-1),
]
).reshape(-1, frame.ptr.width)
if out is None:
return np.hstack(flats).reshape(-1, frame.ptr.width)
total: cython.size_t = sum(flat.shape[0] for flat in flats)
# _check_out enforces C-contiguity, so reshape(-1) stays a view and
# the per-plane writes below land in `out`.
_check_out(out, (total // frame.ptr.width, frame.ptr.width), "uint8")
flat_out = out.reshape(-1)
offset: cython.size_t = 0
for flat in flats:
flat_out[offset : offset + flat.shape[0]] = flat
offset += flat.shape[0]
return out
if format_name == "yuv422p10le":
assert frame.ptr.width % 2 == 0, "width has to be even for this format"
assert frame.ptr.height % 2 == 0, "height has to be even for this format"
Expand All @@ -823,10 +887,15 @@ def to_ndarray(self, channel_last=False, **kwargs):
# Double the width of U and V by repeating each value
u_full = np.repeat(u, 2, axis=1)
v_full = np.repeat(v, 2, axis=1)
if channel_last:
return np.stack([y, u_full, v_full], axis=2)
return np.stack([y, u_full, v_full], axis=0)
array = np.stack([y, u_full, v_full], axis=2 if channel_last else 0)
if out is None:
return array
_check_out(out, array.shape, "uint16")
out[...] = array
return out
if format_name == "pal8":
if out is not None:
raise ValueError("out is not supported for the pal8 format")
image = useful_array(planes[0])
Comment thread
hmaarrfk marked this conversation as resolved.
palette = (
np.frombuffer(planes[1], "i4")
Expand All @@ -835,13 +904,6 @@ def to_ndarray(self, channel_last=False, **kwargs):
.view(np.uint8)
)
return image, palette
if format_name == "nv12":
return np.hstack(
[
useful_array(planes[0]).reshape(-1),
useful_array(planes[1], 2).reshape(-1),
]
).reshape(-1, frame.ptr.width)

raise ValueError(
f"Conversion to numpy array with format `{format_name}` is not yet supported"
Expand Down
5 changes: 4 additions & 1 deletion av/video/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ class VideoFrame(Frame):
def save(self, filepath: str | Path) -> None: ...
def to_image(self, **kwargs): ...
def to_ndarray(
self, channel_last: bool = False, **kwargs: Any
self,
channel_last: bool = False,
out: _SupportedNDarray | None = None,
**kwargs: Any,
) -> _SupportedNDarray: ...
@staticmethod
def from_image(img): ...
Expand Down
81 changes: 81 additions & 0 deletions av/video/plane.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,87 @@ def _buffer_writable(self) -> cython.bint:
return False
return True

def to_ndarray(self, out=None):
"""Get a numpy array of this single plane, with line padding removed.

Unlike :meth:`.VideoFrame.to_ndarray`, which assembles every plane into
one array, this returns just this plane as a ``(height, width)`` array.
That makes it cheap to read a single component -- for example the luma
(Y) plane of a planar YUV frame as a grayscale image -- without ever
touching the chroma planes.

Only single-component planes are supported (the Y/U/V planes of planar
formats, or ``gray``). Packed or semi-planar planes (e.g. ``rgb24`` or
the interleaved chroma plane of ``nv12``) hold more than one component
per sample; use :meth:`.VideoFrame.to_ndarray` for those.

:param out: An optional, preallocated ``(height, width)`` numpy array of
the matching dtype to copy into; it is returned in place of a freshly
allocated array. When ``out`` is ``None`` a zero-copy view onto the
plane's buffer is returned: it shares memory with the frame, so
writing to it mutates the frame, and it is only valid until the
frame's buffer is reused (e.g. by decoding the next frame).

.. note:: Numpy must be installed.
"""
import sys

import numpy as np

fmt = self.frame.format
components = [c for c in fmt.components if c.plane == self.index]
if len(components) != 1:
raise ValueError(
"VideoPlane.to_ndarray() only supports single-component planes; "
f"plane {self.index} of format {fmt.name!r} has "
f"{len(components)} component(s). Use VideoFrame.to_ndarray() "
"for packed or interleaved planes."
)

depth: cython.int = components[0].bits
if depth <= 8:
dtype = np.dtype("uint8")
elif depth <= 16:
dtype = np.dtype("uint16")
else:
raise ValueError(
f"Unsupported component depth {depth} for VideoPlane.to_ndarray()"
)

itemsize: cython.int = dtype.itemsize
line_size: cython.int = self.line_size
total_line_size: cython.int = abs(line_size)
shape = (self.height, self.width)
if line_size < 0:
offset = (self.height - 1) * total_line_size
array = np.ndarray(
shape,
dtype=dtype,
buffer=self,
offset=offset,
strides=(-total_line_size, itemsize),
)
else:
array = np.ndarray(
shape, dtype=dtype, buffer=self, strides=(total_line_size, itemsize)
)

if itemsize > 1 and fmt.name.endswith("be") != (sys.byteorder == "big"):
array = array.byteswap()

if out is None:
return array
if out.shape != array.shape:
raise ValueError(
f"out has shape {out.shape}, but this plane has shape {array.shape}"
)
if out.dtype != array.dtype:
raise ValueError(
f"out has dtype {out.dtype}, but this plane has dtype {array.dtype}"
)
out[...] = array
return out

def __getbuffer__(self, view: cython.pointer[Py_buffer], flags: cython.int):
if self.frame.ptr.hw_frames_ctx:
raise TypeError(
Expand Down
11 changes: 11 additions & 0 deletions av/video/plane.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
from types import CapsuleType
from typing import Any, Union

import numpy as np

from av.plane import Plane

from .frame import VideoFrame

_SupportedNDarray = Union[
np.ndarray[Any, np.dtype[np.uint8]],
np.ndarray[Any, np.dtype[np.uint16]],
]

class VideoPlane(Plane):
line_size: int
width: int
height: int
buffer_size: int

def __init__(self, frame: VideoFrame, index: int) -> None: ...
def to_ndarray(
self, out: _SupportedNDarray | None = None
) -> _SupportedNDarray: ...
def __dlpack_device__(self) -> tuple[int, int]: ...
def __dlpack__(self, *, stream: int | None = None) -> CapsuleType: ...
Loading
Loading