diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8364a5bad..a18a60f07 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -37,7 +37,7 @@ Breaking: Features: -- Nothing (yet) +- Add ``options`` parameter to ``AudioResampler`` for passing ``libswresample`` options (e.g. ``resampler``, ``filter_size``, ``cutoff``) by :gh-user:`WyattBlue` (:issue:`2262`). Fixes: diff --git a/av/audio/resampler.pxd b/av/audio/resampler.pxd index 20b74186e..2846bdfb6 100644 --- a/av/audio/resampler.pxd +++ b/av/audio/resampler.pxd @@ -13,6 +13,7 @@ cdef class AudioResampler: cdef readonly AudioLayout layout cdef readonly int rate cdef readonly unsigned int frame_size + cdef readonly dict options cdef Graph graph cpdef list resample(self, AudioFrame) diff --git a/av/audio/resampler.py b/av/audio/resampler.py index fb6d98910..785e522b9 100644 --- a/av/audio/resampler.py +++ b/av/audio/resampler.py @@ -9,16 +9,23 @@ @cython.final @cython.cclass class AudioResampler: - """AudioResampler(format=None, layout=None, rate=None) + """AudioResampler(format=None, layout=None, rate=None, frame_size=None, options=None) :param AudioFormat format: The target format, or string that parses to one (e.g. ``"s16"``). :param AudioLayout layout: The target layout, or an int/string that parses to one (e.g. ``"stereo"``). :param int rate: The target sample rate. + :param int frame_size: The number of samples per output frame. + :param dict options: ``libswresample`` options passed to the underlying + ``aresample`` filter (e.g. ``{"resampler": "soxr", "precision": "28"}``). + See the `FFmpeg resampler documentation + `_ for the full list. """ - def __cinit__(self, format=None, layout=None, rate=None, frame_size=None): + def __cinit__( + self, format=None, layout=None, rate=None, frame_size=None, options=None + ): if format is not None: self.format = ( format if isinstance(format, AudioFormat) else AudioFormat(format) @@ -29,6 +36,7 @@ def __cinit__(self, format=None, layout=None, rate=None, frame_size=None): self.rate = int(rate) if rate else 0 self.frame_size = int(frame_size) if frame_size else 0 + self.options = {str(k): str(v) for k, v in options.items()} if options else {} self.graph = None @cython.ccall @@ -91,7 +99,17 @@ def resample(self, frame: AudioFrame | None) -> list: channel_layouts=self.layout.name, ) abuffersink = self.graph.add("abuffersink") - abuffer.link_to(aformat) + + # When libswresample options are given, do the conversion with an + # explicit aresample filter (which owns the SwrContext) instead of + # relying on the one FFmpeg auto-inserts before aformat. + if self.options: + aresample = self.graph.add("aresample", **self.options) + abuffer.link_to(aresample) + aresample.link_to(aformat) + else: + abuffer.link_to(aformat) + aformat.link_to(abuffersink) self.graph.configure() diff --git a/av/audio/resampler.pyi b/av/audio/resampler.pyi index cbf2134aa..80c16cf08 100644 --- a/av/audio/resampler.pyi +++ b/av/audio/resampler.pyi @@ -8,6 +8,8 @@ class AudioResampler: rate: int frame_size: int format: AudioFormat + layout: AudioLayout + options: dict[str, str] graph: Graph | None def __init__( @@ -16,5 +18,6 @@ class AudioResampler: layout: str | int | AudioLayout | None = None, rate: int | None = None, frame_size: int | None = None, + options: dict[str, str] | None = None, ) -> None: ... def resample(self, frame: AudioFrame | None) -> list[AudioFrame]: ... diff --git a/tests/test_audioresampler.py b/tests/test_audioresampler.py index cad8ebe34..65fdfdddb 100644 --- a/tests/test_audioresampler.py +++ b/tests/test_audioresampler.py @@ -269,6 +269,51 @@ def test_pts_missing_time_base() -> None: assert oframe.samples == 16 +def test_swr_options() -> None: + """ + libswresample options are passed through to the underlying aresample filter. + """ + resampler = AudioResampler( + "fltp", + "mono", + 16000, + options={"filter_size": "32", "phase_shift": "12", "cutoff": "0.95"}, + ) + assert resampler.options == { + "filter_size": "32", + "phase_shift": "12", + "cutoff": "0.95", + } + + iframe = AudioFrame("s16", "stereo", 1024) + iframe.sample_rate = 48000 + iframe.time_base = Fraction(1, 48000) + iframe.pts = 0 + + oframes = resampler.resample(iframe) + assert len(oframes) == 1 + + oframe = oframes[0] + assert oframe.sample_rate == 16000 + assert oframe.format.name == "fltp" + assert oframe.layout.name == "mono" + + +def test_swr_options_invalid() -> None: + """ + An unknown option is reported rather than silently ignored. + """ + resampler = AudioResampler("s16", "mono", 44100, options={"not_a_real_option": "1"}) + + iframe = AudioFrame("s16", "stereo", 1024) + iframe.sample_rate = 48000 + iframe.time_base = Fraction(1, 48000) + iframe.pts = 0 + + with pytest.raises(ValueError, match="unused config: not_a_real_option"): + resampler.resample(iframe) + + def test_mismatched_input() -> None: """ Consecutive frames must have the same layout, sample format and sample rate.