From 2273834aaf5a51003c3b83ee0952e7ab1da40deb Mon Sep 17 00:00:00 2001
From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com>
Date: Fri, 12 Jun 2026 16:16:43 +0200
Subject: [PATCH 1/5] Fix microphone capture: device-true source format +
 fragment-aware clip reading

Publishing the microphone with a Bluetooth HFP headset on macOS produced
"sample_rate and num_channels don't match" errors from the native source and,
beyond that, persistently choppy or garbled audio on receivers.

Two root causes, both fixed here:

1. The native (Rust) audio source was created with a hardcoded format
   (48000Hz/2ch) while captured frames arrive at whatever format the device
   actually delivers. The native source rejects mismatched frames (it does
   not resample). RtcAudioSource now has two constructors: a device-mode one
   that resolves the format from Unity's output configuration, and an
   explicit-format one for sources that know their exact rate/channels.
   Frames that still mismatch are dropped with a throttled warning instead of
   erroring natively.

2. On macOS with a Bluetooth HFP headset, Unity's Microphone clip buffer is
   fragmented: FMOD writes each real 20ms packet of clip.frequency audio,
   then advances Microphone.GetPosition as if it had written ~3.2x as much,
   zero-filling the skipped range. A raw buffer dump showed valid fragments
   of exactly 320 samples at a stride of exactly 1024 (= 1/k where k is the
   counter inflation), with the fragments joining continuously - the stream
   is intact, just scattered. Every playback-based capture strategy therefore
   chops (31% voice, 69% padding) and counter-paced reading garbles.

   MicrophoneSource now reads the clip ring buffer directly (no AudioSource,
   no OnAudioFilterRead - which also decouples capture from the output
   device's clock). A short pre-roll measures the counter rate
   (k = counterRate / clip.frequency) and the counter's smallest discrete
   jump (the stride). Healthy devices (k ~ 1) use a plain contiguous read;
   fragmented devices (k > 1.05) read only the first stride/k samples of
   each stride - exactly the valid fragments. Captured audio is downmixed to
   mono and resampled from clip.frequency to a fixed 48kHz native source,
   preserving the publish-before-start contract. Backlog beyond 200ms after
   a stall is dropped, stride-aligned, to avoid overrunning the native queue.

Also removes the redundant Microphone.Start in the Meet sample and lets the
test sine source declare its exact format explicitly.

Verified end-to-end: macOS publisher with the Bluetooth headset microphone to
an Android receiver now sounds clean and correct-pitch; healthy microphones
take the contiguous path unchanged.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 Runtime/Scripts/BasicAudioSource.cs         |   6 +-
 Runtime/Scripts/MicrophoneSource.cs         | 256 +++++++++++++++-----
 Runtime/Scripts/RtcAudioSource.cs           |  81 ++++++-
 Samples~/Meet/Assets/Runtime/MeetManager.cs |   3 +-
 Tests/PlayMode/Utils/SineWaveAudioSource.cs |   2 +-
 5 files changed, 273 insertions(+), 75 deletions(-)
diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs
index 3b63680b..8193090d 100644
--- a/Runtime/Scripts/BasicAudioSource.cs
+++ b/Runtime/Scripts/BasicAudioSource.cs
@@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource
         /// Creates a new basic audio source for the given <see cref="AudioSource"/> in the scene.
         /// </summary>
         /// <param name="source">The <see cref="AudioSource"/> to capture from.</param>
-        /// <param name="channels">The number of channels to capture.</param>
         /// <param name="sourceType">The type of audio source.</param>
-        public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType)
+        /// <remarks>
+        /// The sample rate and channel count are taken from Unity's audio configuration.
+        /// </remarks>
+        public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType)
         {
             _source = source;
         }
diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs
index 904b8da7..1a1823f0 100644
--- a/Runtime/Scripts/MicrophoneSource.cs
+++ b/Runtime/Scripts/MicrophoneSource.cs
@@ -1,5 +1,6 @@
 using System;
 using System.Collections;
+using System.Collections.Generic;
 using UnityEngine;
 using LiveKit.Internal;
 
@@ -13,25 +14,59 @@ namespace LiveKit
     /// </remarks>
     sealed public class MicrophoneSource : RtcAudioSource
     {
-        private readonly GameObject _sourceObject;
+        // --- Capture design ---
+        // The microphone clip's ring buffer is read directly (no AudioSource playback, no
+        // OnAudioFilterRead), so capture is decoupled from the output device's clock.
+        //
+        // Microphone.GetPosition cannot be trusted as a sample position on every platform. On
+        // macOS with a Bluetooth HFP headset, FMOD writes each real 20ms packet of clip.frequency
+        // audio, then advances the position counter ~3.2x too far and zero-fills the skipped
+        // range. The buffer then holds valid fragments of N samples at a stride J (measured: 320
+        // of every 1024) and the counter rate is k = J/N times the data rate. Inspection of a raw
+        // buffer dump showed the fragments are consecutive speech that joins continuously, so the
+        // stream is reconstructed losslessly by reading only the first N = J/k samples of each
+        // stride. Healthy devices have k ~ 1 and use a plain contiguous read.
+        //
+        // The clip's data rate is clip.frequency (verified: fragments play at correct pitch), so
+        // captured samples are resampled from clip.frequency to the fixed native-source rate.
+        private const uint TargetSampleRate = 48000;
+        private const float PreRollSeconds = 0.3f;
+        private const double FragmentedKThreshold = 1.05;
+        private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall
+
         private readonly string _deviceName;
 
         public override event Action<float[], int, int> AudioRead;
 
         private bool _disposed = false;
         private bool _started = false;
+        private volatile bool _capturing = false;
+
+        // Streaming linear-resampler state (input = clip.frequency, output = TargetSampleRate).
+        private double _resamplePos;
+        private float _resamplePrev;
 
         /// <summary>
         /// Creates a new microphone source for the given device.
         /// </summary>
         /// <param name="deviceName">The name of the device to capture from. Use <see cref="Microphone.devices"/> to
         /// get the list of available devices.</param>
-        /// <param name="sourceObject">The GameObject to attach the AudioSource to. The object must be kept in the scene
-        /// for the duration of the source's lifetime.</param>
-        public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone)
+        /// <param name="sourceObject">Unused; retained for compatibility. The microphone clip is read
+        /// directly, so no scene GameObject/AudioSource is required.</param>
+        public MicrophoneSource(string deviceName, GameObject sourceObject)
+            : base(RtcAudioSourceType.AudioSourceMicrophone, TargetSampleRate, 1)
         {
             _deviceName = deviceName;
-            _sourceObject = sourceObject;
+        }
+
+        // The rate requested from Microphone.Start (a hint the platform may not honor), clamped to
+        // the device's reported range. The authoritative data rate is clip.frequency afterwards.
+        private static int ResolveRequestedSampleRate(string deviceName)
+        {
+            Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq);
+            if (minFreq == 0 && maxFreq == 0)
+                return (int)TargetSampleRate;
+            return Mathf.Clamp((int)TargetSampleRate, minFreq, maxFreq);
         }
 
         /// <summary>
@@ -49,7 +84,6 @@ public override void Start()
             base.Start();
             if (_started) return;
 
-
             if (!Application.HasUserAuthorization(mode: UserAuthorization.Microphone))
                 throw new InvalidOperationException("Microphone access not authorized");
 
@@ -61,13 +95,6 @@ public override void Start()
 
         private IEnumerator StartMicrophone()
         {
-            // Validate that the GameObject is still valid before starting
-            if (_sourceObject == null)
-            {
-                Utils.Error("MicrophoneSource: GameObject is null, cannot start microphone");
-                yield break;
-            }
-
             // Verify microphone is still authorized (could change during background)
             if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
             {
@@ -76,13 +103,14 @@ private IEnumerator StartMicrophone()
             }
 
             AudioClip clip = null;
+            int requestedRate = ResolveRequestedSampleRate(_deviceName);
             try
             {
                 clip = Microphone.Start(
                     _deviceName,
                     loop: true,
-                    lengthSec: 1,
-                    frequency: (int)DefaultMicrophoneSampleRate
+                    lengthSec: 2,
+                    frequency: requestedRate
                 );
             }
             catch (Exception e)
@@ -97,29 +125,6 @@ private IEnumerator StartMicrophone()
                 yield break;
             }
 
-            // Ensure no duplicate components exist before adding new ones.
-            // This is important during app resume on iOS where components might not be
-            // fully destroyed yet due to Unity's deferred Destroy().
-            var existingSource = _sourceObject.GetComponent<AudioSource>();
-            if (existingSource != null)
-                UnityEngine.Object.DestroyImmediate(existingSource);
-
-            var existingProbe = _sourceObject.GetComponent<AudioProbe>();
-            if (existingProbe != null)
-            {
-                existingProbe.AudioRead -= OnAudioRead;
-                UnityEngine.Object.DestroyImmediate(existingProbe);
-            }
-
-            var source = _sourceObject.AddComponent<AudioSource>();
-            source.clip = clip;
-            source.loop = true;
-
-            var probe = _sourceObject.AddComponent<AudioProbe>();
-            // Clear the audio data after it is read as to not play it through the speaker locally.
-            probe.ClearAfterInvocation();
-            probe.AudioRead += OnAudioRead;
-
             // Wait for microphone to actually start producing data with a timeout
             const float timeout = 2f;
             float elapsed = 0f;
@@ -135,8 +140,155 @@ private IEnumerator StartMicrophone()
                 yield break;
             }
 
-            source.Play();
-            Utils.Debug($"MicrophoneSource device='{_deviceName}' started successfully");
+            Utils.Info($"MicrophoneSource device='{_deviceName}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={TargetSampleRate}Hz");
+
+            _capturing = true;
+            MonoBehaviourContext.RunCoroutine(CaptureLoop(clip));
+        }
+
+        // Reads new samples from the clip's ring buffer each frame and pushes them to the native
+        // source via AudioRead. Runs on the main thread; the native source's queue absorbs the
+        // per-frame pacing jitter.
+        private IEnumerator CaptureLoop(AudioClip clip)
+        {
+            int clipFrames = clip.samples;
+            int channels = clip.channels;
+            int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate;
+
+            // Pre-roll: measure how fast the position counter advances (its average is steady even
+            // when individual values jump) and the size of its smallest discrete jump.
+            int prevCounter = Microphone.GetPosition(_deviceName);
+            long advance = 0;
+            long minJump = long.MaxValue;
+            var preRoll = System.Diagnostics.Stopwatch.StartNew();
+            while (preRoll.Elapsed.TotalSeconds < PreRollSeconds)
+            {
+                if (!_capturing || _disposed) yield break;
+                yield return null;
+                int c = Microphone.GetPosition(_deviceName);
+                long d = ((c - prevCounter) % clipFrames + clipFrames) % clipFrames;
+                prevCounter = c;
+                advance += d;
+                if (d > 0 && d < minJump) minJump = d;
+            }
+            if (!_capturing || _disposed) yield break;
+
+            double counterRate = advance > 0 ? advance / preRoll.Elapsed.TotalSeconds : dataRate;
+            double k = counterRate / dataRate;
+
+            // Fragmented mode: the counter advances in jumps of `stride`, but only the first
+            // `validPerStride` samples of each stride contain data; the rest is zero padding.
+            bool fragmented = k > FragmentedKThreshold && minJump != long.MaxValue && minJump > 1;
+            int stride = fragmented ? (int)minJump : 0;
+            int validPerStride = fragmented ? Math.Max(1, (int)Math.Round(stride / k)) : 0;
+
+            if (fragmented)
+                Utils.Info($"MicrophoneSource: fragmented clip detected (k={k:F2}); reading {validPerStride} of every {stride} samples at {dataRate}Hz");
+            else
+                Utils.Info($"MicrophoneSource: contiguous capture (k={k:F2}) at {dataRate}Hz");
+
+            _resamplePos = 0.0;
+            _resamplePrev = 0f;
+            long maxBacklog = (long)(counterRate * MaxBacklogSeconds);
+            int readPos = prevCounter; // counter values land on jump boundaries
+            long pending = 0;
+
+            while (_capturing && !_disposed)
+            {
+                yield return null;
+
+                int c = Microphone.GetPosition(_deviceName);
+                long d = ((c - prevCounter) % clipFrames + clipFrames) % clipFrames;
+                prevCounter = c;
+                pending += d;
+
+                // After a long stall, drop the oldest backlog instead of pushing a burst that
+                // would overrun the native source's queue.
+                if (pending > maxBacklog)
+                {
+                    long drop = pending - maxBacklog;
+                    if (fragmented) drop -= drop % stride; // preserve stride alignment
+                    readPos = (int)((readPos + drop) % clipFrames);
+                    pending -= drop;
+                    Utils.Warning($"MicrophoneSource: dropped {drop} buffered samples after a stall");
+                }
+
+                if (fragmented)
+                {
+                    while (pending >= stride)
+                    {
+                        EmitClipRange(clip, channels, dataRate, readPos, validPerStride, clipFrames);
+                        readPos = (readPos + stride) % clipFrames;
+                        pending -= stride;
+                    }
+                }
+                else if (pending > 0)
+                {
+                    EmitClipRange(clip, channels, dataRate, readPos, (int)pending, clipFrames);
+                    readPos = (int)((readPos + pending) % clipFrames);
+                    pending = 0;
+                }
+            }
+        }
+
+        // Reads `count` frames starting at `start`, splitting at the ring wrap so each GetData
+        // read is contiguous.
+        private void EmitClipRange(AudioClip clip, int channels, int dataRate, int start, int count, int clipFrames)
+        {
+            if (count <= 0) return;
+            int first = Math.Min(count, clipFrames - start);
+            ReadAndPush(clip, channels, dataRate, start, first);
+            if (count > first)
+                ReadAndPush(clip, channels, dataRate, 0, count - first);
+        }
+
+        // Reads a contiguous range, downmixes to mono, resamples dataRate -> TargetSampleRate
+        // (streaming linear interpolation carrying state across calls, so fragment junctions stay
+        // continuous), and fires AudioRead.
+        private void ReadAndPush(AudioClip clip, int channels, int dataRate, int start, int count)
+        {
+            if (count <= 0) return;
+
+            var interleaved = new float[count * channels];
+            clip.GetData(interleaved, start);
+
+            float[] mono;
+            if (channels == 1)
+            {
+                mono = interleaved;
+            }
+            else
+            {
+                mono = new float[count];
+                for (int f = 0; f < count; f++)
+                {
+                    float sum = 0f;
+                    for (int ch = 0; ch < channels; ch++)
+                        sum += interleaved[f * channels + ch];
+                    mono[f] = sum / channels;
+                }
+            }
+
+            double step = (double)dataRate / TargetSampleRate;
+            var output = new List<float>((int)(count / step) + 2);
+
+            // Index -1 maps to the carried last sample of the previous chunk so interpolation is
+            // continuous across chunk boundaries. pos stays >= -1.
+            double pos = _resamplePos;
+            while (pos < count - 1)
+            {
+                int i0 = (int)Math.Floor(pos);
+                float a = i0 < 0 ? _resamplePrev : mono[i0];
+                float b = mono[i0 + 1];
+                float frac = (float)(pos - i0);
+                output.Add(a * (1f - frac) + b * frac);
+                pos += step;
+            }
+            _resamplePrev = mono[count - 1];
+            _resamplePos = pos - count;
+
+            if (output.Count > 0)
+                AudioRead?.Invoke(output.ToArray(), 1, (int)TargetSampleRate);
         }
 
         /// <summary>
@@ -152,33 +304,15 @@ public override void Stop()
 
         private IEnumerator StopMicrophone()
         {
+            _capturing = false;
+
             if (Microphone.IsRecording(_deviceName))
                 Microphone.End(_deviceName);
 
-            // Check if GameObject is still valid before trying to access components
-            if (_sourceObject != null)
-            {
-                var probe = _sourceObject.GetComponent<AudioProbe>();
-                if (probe != null)
-                {
-                    probe.AudioRead -= OnAudioRead;
-                    UnityEngine.Object.Destroy(probe);
-                }
-
-                var source = _sourceObject.GetComponent<AudioSource>();
-                if (source != null)
-                    UnityEngine.Object.Destroy(source);
-            }
-
             Utils.Debug($"MicrophoneSource device='{_deviceName}' stopped");
             yield return null;
         }
 
-        private void OnAudioRead(float[] data, int channels, int sampleRate)
-        {
-            AudioRead?.Invoke(data, channels, sampleRate);
-        }
-
         private void OnApplicationPause(bool pause)
         {
             if (!_started)
@@ -246,4 +380,4 @@ protected override void Dispose(bool disposing)
             Dispose(false);
         }
     }
-}
\ No newline at end of file
+}
diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs
index a9af8a0a..43f5c102 100644
--- a/Runtime/Scripts/RtcAudioSource.cs
+++ b/Runtime/Scripts/RtcAudioSource.cs
@@ -83,20 +83,33 @@ private sealed class PendingAudioFrame
         private volatile bool _disposed = false;
         private int _audioReadCount = 0;
 
-        protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom)
+        // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of
+        // time — it is whatever Unity's audio graph delivers. They use this constructor, which
+        // configures the native source from Unity's current output configuration.
+        protected RtcAudioSource(RtcAudioSourceType audioSourceType)
+            : this(audioSourceType, 0, 0) { }
+
+        // Sources that generate a fixed, known format (e.g. test signal generators) declare it
+        // directly. Passing 0 for either value falls back to the device configuration.
+        protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels)
         {
             _sourceType = audioSourceType;
-            _expectedChannels = (uint)channels;
+
+            if (sampleRate > 0 && channels > 0)
+            {
+                _expectedSampleRate = sampleRate;
+                _expectedChannels = channels;
+            }
+            else
+            {
+                (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat();
+            }
 
             using var request = FFIBridge.Instance.NewRequest<NewAudioSourceRequest>();
             var newAudioSource = request.request;
             newAudioSource.Type = AudioSourceType.AudioSourceNative;
-            newAudioSource.NumChannels = (uint)channels;
-            newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ?
-                DefaultMicrophoneSampleRate : DefaultSampleRate;
-            _expectedSampleRate = newAudioSource.SampleRate;
-
-            Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}");
+            newAudioSource.NumChannels = _expectedChannels;
+            newAudioSource.SampleRate = _expectedSampleRate;
 
             newAudioSource.Options = request.TempResource<AudioSourceOptions>();
             newAudioSource.Options.EchoCancellation = true;
@@ -109,6 +122,49 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType =
             Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}");
         }
 
+        // Reads Unity's actual output audio configuration. The capture path delivers buffers at the
+        // DSP output rate/channel count (see AudioProbe), so this is the format the native source
+        // must match. Falls back to the platform defaults when Unity cannot report a configuration
+        // (e.g. batch mode without an audio device).
+        private (uint sampleRate, uint channels) ResolveDeviceFormat()
+        {
+            uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone
+                ? DefaultMicrophoneSampleRate
+                : DefaultSampleRate;
+            uint channels = DefaultChannels;
+
+            try
+            {
+                var config = UnityEngine.AudioSettings.GetConfiguration();
+                if (config.sampleRate > 0)
+                    sampleRate = (uint)config.sampleRate;
+                var configuredChannels = SpeakerModeChannels(config.speakerMode);
+                if (configuredChannels > 0)
+                    channels = configuredChannels;
+            }
+            catch (Exception e)
+            {
+                Utils.Warning($"{DebugTag} could not read Unity audio configuration, using defaults: {e.Message}");
+            }
+
+            return (sampleRate, channels);
+        }
+
+        private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode)
+        {
+            switch (mode)
+            {
+                case UnityEngine.AudioSpeakerMode.Mono: return 1;
+                case UnityEngine.AudioSpeakerMode.Stereo: return 2;
+                case UnityEngine.AudioSpeakerMode.Quad: return 4;
+                case UnityEngine.AudioSpeakerMode.Surround: return 5;
+                case UnityEngine.AudioSpeakerMode.Mode5point1: return 6;
+                case UnityEngine.AudioSpeakerMode.Mode7point1: return 8;
+                case UnityEngine.AudioSpeakerMode.Prologic: return 2;
+                default: return 0;
+            }
+        }
+
         /// <summary>
         /// Begin capturing audio samples from the underlying source.
         /// </summary>
@@ -153,9 +209,16 @@ private void OnAudioRead(float[] data, int channels, int sampleRate)
                 return;
             }
 
+            // The native source rejects frames whose rate/channels differ from how it was
+            // configured (it does not resample). This should not happen now that sources declare
+            // or resolve their real format, but if Unity reports an inconsistent format — or the
+            // output configuration changes at runtime — we drop the frame instead of sending a
+            // mismatch the native side would error on.
             if ((uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels)
             {
-                Utils.Warning($"{DebugTag} audio frame #{frameIndex} metadata mismatch actualRate={sampleRate} actualChannels={channels} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}");
+                if (frameIndex == 1 || frameIndex % 100 == 0)
+                    Utils.Warning($"{DebugTag} dropping audio frame #{frameIndex}: format {sampleRate}/{channels} does not match source {_expectedSampleRate}/{_expectedChannels} (sourceType={_sourceType})");
+                return;
             }
 
             var pendingBeforeSend = PendingFrameCount();
diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs
index 225c7a0c..97b2cb70 100644
--- a/Samples~/Meet/Assets/Runtime/MeetManager.cs
+++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs
@@ -453,8 +453,7 @@ private IEnumerator PublishLocalMicrophone()
     {
         if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break;
 
-        Microphone.Start(null, true, 10, 44100);
-
+        // MicrophoneSource starts the device itself, so we only need the device name here.
         var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}");
         audioObject.transform.SetParent(_audioTrackParent);
 
diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs
index 907e9ccc..2337615b 100644
--- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs
+++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs
@@ -31,7 +31,7 @@ public SineWaveAudioSource(
             int sampleRate = 48000,
             double frequencyHz = 440.0,
             float amplitude = 0.1f)
-            : base(channels, RtcAudioSourceType.AudioSourceCustom)
+            : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels)
         {
             _channels = channels;
             _sampleRate = sampleRate;

From 35031cc647393c6e583c47bc1cc5d8424eba418f Mon Sep 17 00:00:00 2001
From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com>
Date: Fri, 12 Jun 2026 16:39:12 +0200
Subject: [PATCH 2/5] Extract mic clip reading logic into testable classes +
 EditMode tests

The fragment-aware capture logic is subtle and was painful to diagnose, but
most of it is pure logic that doesn't need a microphone. Extract it from
MicrophoneSource into two UnityEngine-free internal classes:

- MicClipReader: pre-roll measurement (counter rate k, smallest jump =
  stride), contiguous vs fragmented mode selection, per-stride valid-range
  emission, ring-wrap splitting, and stride-aligned backlog dropping.
- StreamingResampler: the streaming linear resampler (state carries across
  chunks so fragment junctions stay continuous).

MicrophoneSource.CaptureLoop becomes a thin Unity shell: poll GetPosition,
feed the reader, GetData the emitted ranges, downmix, resample, push.
Behavior is unchanged.

Add EditMode tests covering: healthy contiguous capture (k~1, every sample
emitted), fragmented detection (k=3.2, stride 1024, valid 320 - the exact
structure dumped from the Sony MDR-1000X on macOS), lossless reconstruction
of a synthetic fragmented buffer across multiple ring laps (strictly
sequential output, no gaps/repeats/padding), stride-aligned backlog drops
bounded by the limit, pre-roll emitting nothing, resampler frequency/length
preservation, and chunked-equals-whole resampling (1-sample tail tolerance
for float boundary rounding).

Logic verified by executing all test scenarios in a standalone harness
(mono) in addition to compiling the Unity assemblies.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 Runtime/Scripts/Internal/MicClipReader.cs     | 160 ++++++++++++
 .../Scripts/Internal/StreamingResampler.cs    |  54 ++++
 Runtime/Scripts/MicrophoneSource.cs           | 142 +++-------
 Tests/EditMode/MicClipReaderTests.cs          | 243 ++++++++++++++++++
 4 files changed, 490 insertions(+), 109 deletions(-)
 create mode 100644 Runtime/Scripts/Internal/MicClipReader.cs
 create mode 100644 Runtime/Scripts/Internal/StreamingResampler.cs
 create mode 100644 Tests/EditMode/MicClipReaderTests.cs

diff --git a/Runtime/Scripts/Internal/MicClipReader.cs b/Runtime/Scripts/Internal/MicClipReader.cs
new file mode 100644
index 00000000..cb04b9fa
--- /dev/null
+++ b/Runtime/Scripts/Internal/MicClipReader.cs
@@ -0,0 +1,160 @@
+using System;
+using System.Collections.Generic;
+
+namespace LiveKit.Internal
+{
+    /// <summary>
+    /// Pure logic for reading a looping microphone clip ring buffer whose position counter may be
+    /// unreliable. Free of UnityEngine dependencies so it can be unit tested.
+    /// </summary>
+    /// <remarks>
+    /// On most devices the position counter advances at the clip's data rate and capture is a
+    /// plain contiguous read. On macOS with a Bluetooth HFP headset, however, FMOD writes each
+    /// real packet of clip.frequency audio and then advances the counter k (~3.2) times too far,
+    /// zero-filling the skipped range: the buffer holds valid fragments of N samples at a stride
+    /// J (measured: 320 of every 1024), the fragments join continuously, and the counter rate is
+    /// k = J/N times the data rate. The reader measures the counter rate and its smallest
+    /// discrete jump during a pre-roll; when the rate is inflated it emits only the first J/k
+    /// samples of each stride, reconstructing the contiguous stream.
+    /// </remarks>
+    internal sealed class MicClipReader
+    {
+        public struct ReadRange
+        {
+            public int Start;
+            public int Count;
+        }
+
+        private readonly int _clipFrames;
+        private readonly int _dataRate;
+        private readonly double _preRollSeconds;
+        private readonly double _fragmentedKThreshold;
+        private readonly double _maxBacklogSeconds;
+
+        private bool _hasFirstSample;
+        private int _prevCounter;
+        private double _preRollStart;
+        private long _preRollAdvance;
+        private long _minJump = long.MaxValue;
+
+        private long _maxBacklog;
+        private int _readPos;
+        private long _pending;
+
+        /// <summary>False during the pre-roll measurement window; no ranges are emitted until ready.</summary>
+        public bool Ready { get; private set; }
+
+        /// <summary>True when the counter rate is inflated and only part of each stride holds data.</summary>
+        public bool Fragmented { get; private set; }
+
+        /// <summary>Counter samples per fragment cycle (0 when not fragmented).</summary>
+        public int Stride { get; private set; }
+
+        /// <summary>Valid data samples at the start of each stride (0 when not fragmented).</summary>
+        public int ValidPerStride { get; private set; }
+
+        /// <summary>Measured counter advance per second.</summary>
+        public double CounterRate { get; private set; }
+
+        /// <summary>Counter inflation factor: CounterRate / dataRate (~1 on healthy devices).</summary>
+        public double K { get; private set; }
+
+        /// <summary>Total counter samples discarded by backlog drops (e.g. after a stall).</summary>
+        public long TotalDropped { get; private set; }
+
+        public MicClipReader(int clipFrames, int dataRate,
+            double preRollSeconds = 0.3, double fragmentedKThreshold = 1.05, double maxBacklogSeconds = 0.2)
+        {
+            if (clipFrames <= 0) throw new ArgumentOutOfRangeException(nameof(clipFrames));
+            if (dataRate <= 0) throw new ArgumentOutOfRangeException(nameof(dataRate));
+            _clipFrames = clipFrames;
+            _dataRate = dataRate;
+            _preRollSeconds = preRollSeconds;
+            _fragmentedKThreshold = fragmentedKThreshold;
+            _maxBacklogSeconds = maxBacklogSeconds;
+        }
+
+        /// <summary>
+        /// Feeds the current counter position at a monotonic time and appends the ranges that
+        /// should be read from the clip (already split at the ring wrap) to <paramref name="ranges"/>.
+        /// </summary>
+        public void Update(int counterPosition, double elapsedSeconds, List<ReadRange> ranges)
+        {
+            if (!_hasFirstSample)
+            {
+                _hasFirstSample = true;
+                _prevCounter = counterPosition;
+                _preRollStart = elapsedSeconds;
+                return;
+            }
+
+            long d = ((counterPosition - _prevCounter) % _clipFrames + _clipFrames) % _clipFrames;
+            _prevCounter = counterPosition;
+
+            if (!Ready)
+            {
+                _preRollAdvance += d;
+                if (d > 0 && d < _minJump) _minJump = d;
+                double window = elapsedSeconds - _preRollStart;
+                if (window >= _preRollSeconds)
+                    FinishPreRoll(window);
+                return;
+            }
+
+            _pending += d;
+
+            // After a long stall, drop the oldest backlog instead of pushing a burst that would
+            // overrun the consumer. (A stall longer than one counter lap aliases the unwrapped
+            // advance and silently loses whole laps; this bound covers everything observable.)
+            if (_pending > _maxBacklog)
+            {
+                long drop = _pending - _maxBacklog;
+                if (Fragmented) drop -= drop % Stride; // preserve stride alignment
+                if (drop > 0)
+                {
+                    _readPos = (int)((_readPos + drop) % _clipFrames);
+                    _pending -= drop;
+                    TotalDropped += drop;
+                }
+            }
+
+            if (Fragmented)
+            {
+                while (_pending >= Stride)
+                {
+                    EmitSplit(_readPos, ValidPerStride, ranges);
+                    _readPos = (_readPos + Stride) % _clipFrames;
+                    _pending -= Stride;
+                }
+            }
+            else if (_pending > 0)
+            {
+                EmitSplit(_readPos, (int)_pending, ranges);
+                _readPos = (int)((_readPos + _pending) % _clipFrames);
+                _pending = 0;
+            }
+        }
+
+        private void FinishPreRoll(double windowSeconds)
+        {
+            CounterRate = _preRollAdvance > 0 ? _preRollAdvance / windowSeconds : _dataRate;
+            K = CounterRate / _dataRate;
+            Fragmented = K > _fragmentedKThreshold && _minJump != long.MaxValue && _minJump > 1;
+            Stride = Fragmented ? (int)_minJump : 0;
+            ValidPerStride = Fragmented ? Math.Max(1, (int)Math.Round(Stride / K)) : 0;
+            _maxBacklog = (long)(CounterRate * _maxBacklogSeconds);
+            _readPos = _prevCounter; // counter values land on jump boundaries
+            _pending = 0;
+            Ready = true;
+        }
+
+        private void EmitSplit(int start, int count, List<ReadRange> ranges)
+        {
+            if (count <= 0) return;
+            int first = Math.Min(count, _clipFrames - start);
+            ranges.Add(new ReadRange { Start = start, Count = first });
+            if (count > first)
+                ranges.Add(new ReadRange { Start = 0, Count = count - first });
+        }
+    }
+}
diff --git a/Runtime/Scripts/Internal/StreamingResampler.cs b/Runtime/Scripts/Internal/StreamingResampler.cs
new file mode 100644
index 00000000..dd9be2d6
--- /dev/null
+++ b/Runtime/Scripts/Internal/StreamingResampler.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Collections.Generic;
+
+namespace LiveKit.Internal
+{
+    /// <summary>
+    /// Streaming linear resampler for mono audio. Interpolation state carries across chunks, so a
+    /// stream processed in arbitrary slices produces the same output as processing it whole.
+    /// Free of UnityEngine dependencies so it can be unit tested.
+    /// </summary>
+    internal sealed class StreamingResampler
+    {
+        private readonly double _step; // input samples advanced per output sample
+        private double _pos;           // fractional read position; >= -1, where -1 maps to _prev
+        private float _prev;           // last sample of the previous chunk
+
+        public StreamingResampler(int inputRate, int outputRate)
+        {
+            if (inputRate <= 0) throw new ArgumentOutOfRangeException(nameof(inputRate));
+            if (outputRate <= 0) throw new ArgumentOutOfRangeException(nameof(outputRate));
+            _step = (double)inputRate / outputRate;
+        }
+
+        public void Reset()
+        {
+            _pos = 0.0;
+            _prev = 0f;
+        }
+
+        /// <summary>
+        /// Resamples the first <paramref name="count"/> samples of <paramref name="input"/> and
+        /// returns the produced output samples (possibly empty for very small chunks).
+        /// </summary>
+        public float[] Process(float[] input, int count)
+        {
+            if (count <= 0) return Array.Empty<float>();
+
+            var output = new List<float>((int)(count / _step) + 2);
+            double pos = _pos;
+            while (pos < count - 1)
+            {
+                int i0 = (int)Math.Floor(pos);
+                float a = i0 < 0 ? _prev : input[i0];
+                float b = input[i0 + 1];
+                float frac = (float)(pos - i0);
+                output.Add(a * (1f - frac) + b * frac);
+                pos += _step;
+            }
+            _prev = input[count - 1];
+            _pos = pos - count;
+            return output.ToArray();
+        }
+    }
+}
diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs
index 1a1823f0..89f3f68f 100644
--- a/Runtime/Scripts/MicrophoneSource.cs
+++ b/Runtime/Scripts/MicrophoneSource.cs
@@ -18,14 +18,9 @@ sealed public class MicrophoneSource : RtcAudioSource
         // The microphone clip's ring buffer is read directly (no AudioSource playback, no
         // OnAudioFilterRead), so capture is decoupled from the output device's clock.
         //
-        // Microphone.GetPosition cannot be trusted as a sample position on every platform. On
-        // macOS with a Bluetooth HFP headset, FMOD writes each real 20ms packet of clip.frequency
-        // audio, then advances the position counter ~3.2x too far and zero-fills the skipped
-        // range. The buffer then holds valid fragments of N samples at a stride J (measured: 320
-        // of every 1024) and the counter rate is k = J/N times the data rate. Inspection of a raw
-        // buffer dump showed the fragments are consecutive speech that joins continuously, so the
-        // stream is reconstructed losslessly by reading only the first N = J/k samples of each
-        // stride. Healthy devices have k ~ 1 and use a plain contiguous read.
+        // Microphone.GetPosition cannot be trusted as a sample position on every platform; see
+        // MicClipReader for the fragmented-buffer model (macOS + Bluetooth HFP) and how the
+        // contiguous stream is reconstructed from it.
         //
         // The clip's data rate is clip.frequency (verified: fragments play at correct pitch), so
         // captured samples are resampled from clip.frequency to the fixed native-source rate.
@@ -42,9 +37,7 @@ sealed public class MicrophoneSource : RtcAudioSource
         private bool _started = false;
         private volatile bool _capturing = false;
 
-        // Streaming linear-resampler state (input = clip.frequency, output = TargetSampleRate).
-        private double _resamplePos;
-        private float _resamplePrev;
+        private StreamingResampler _resampler;
 
         /// <summary>
         /// Creates a new microphone source for the given device.
@@ -147,105 +140,53 @@ private IEnumerator StartMicrophone()
         }
 
         // Reads new samples from the clip's ring buffer each frame and pushes them to the native
-        // source via AudioRead. Runs on the main thread; the native source's queue absorbs the
-        // per-frame pacing jitter.
+        // source via AudioRead. MicClipReader decides what to read (including reconstructing
+        // fragmented buffers); this loop is the thin Unity shell around it. Runs on the main
+        // thread; the native source's queue absorbs the per-frame pacing jitter.
         private IEnumerator CaptureLoop(AudioClip clip)
         {
             int clipFrames = clip.samples;
             int channels = clip.channels;
             int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate;
 
-            // Pre-roll: measure how fast the position counter advances (its average is steady even
-            // when individual values jump) and the size of its smallest discrete jump.
-            int prevCounter = Microphone.GetPosition(_deviceName);
-            long advance = 0;
-            long minJump = long.MaxValue;
-            var preRoll = System.Diagnostics.Stopwatch.StartNew();
-            while (preRoll.Elapsed.TotalSeconds < PreRollSeconds)
-            {
-                if (!_capturing || _disposed) yield break;
-                yield return null;
-                int c = Microphone.GetPosition(_deviceName);
-                long d = ((c - prevCounter) % clipFrames + clipFrames) % clipFrames;
-                prevCounter = c;
-                advance += d;
-                if (d > 0 && d < minJump) minJump = d;
-            }
-            if (!_capturing || _disposed) yield break;
-
-            double counterRate = advance > 0 ? advance / preRoll.Elapsed.TotalSeconds : dataRate;
-            double k = counterRate / dataRate;
-
-            // Fragmented mode: the counter advances in jumps of `stride`, but only the first
-            // `validPerStride` samples of each stride contain data; the rest is zero padding.
-            bool fragmented = k > FragmentedKThreshold && minJump != long.MaxValue && minJump > 1;
-            int stride = fragmented ? (int)minJump : 0;
-            int validPerStride = fragmented ? Math.Max(1, (int)Math.Round(stride / k)) : 0;
-
-            if (fragmented)
-                Utils.Info($"MicrophoneSource: fragmented clip detected (k={k:F2}); reading {validPerStride} of every {stride} samples at {dataRate}Hz");
-            else
-                Utils.Info($"MicrophoneSource: contiguous capture (k={k:F2}) at {dataRate}Hz");
-
-            _resamplePos = 0.0;
-            _resamplePrev = 0f;
-            long maxBacklog = (long)(counterRate * MaxBacklogSeconds);
-            int readPos = prevCounter; // counter values land on jump boundaries
-            long pending = 0;
+            var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds);
+            _resampler = new StreamingResampler(dataRate, (int)TargetSampleRate);
+            var ranges = new List<MicClipReader.ReadRange>();
+            var clock = System.Diagnostics.Stopwatch.StartNew();
+            bool announced = false;
+            long reportedDrops = 0;
 
             while (_capturing && !_disposed)
             {
                 yield return null;
 
-                int c = Microphone.GetPosition(_deviceName);
-                long d = ((c - prevCounter) % clipFrames + clipFrames) % clipFrames;
-                prevCounter = c;
-                pending += d;
+                ranges.Clear();
+                reader.Update(Microphone.GetPosition(_deviceName), clock.Elapsed.TotalSeconds, ranges);
 
-                // After a long stall, drop the oldest backlog instead of pushing a burst that
-                // would overrun the native source's queue.
-                if (pending > maxBacklog)
+                if (!announced && reader.Ready)
                 {
-                    long drop = pending - maxBacklog;
-                    if (fragmented) drop -= drop % stride; // preserve stride alignment
-                    readPos = (int)((readPos + drop) % clipFrames);
-                    pending -= drop;
-                    Utils.Warning($"MicrophoneSource: dropped {drop} buffered samples after a stall");
+                    announced = true;
+                    if (reader.Fragmented)
+                        Utils.Info($"MicrophoneSource: fragmented clip detected (k={reader.K:F2}); reading {reader.ValidPerStride} of every {reader.Stride} samples at {dataRate}Hz");
+                    else
+                        Utils.Info($"MicrophoneSource: contiguous capture (k={reader.K:F2}) at {dataRate}Hz");
                 }
 
-                if (fragmented)
-                {
-                    while (pending >= stride)
-                    {
-                        EmitClipRange(clip, channels, dataRate, readPos, validPerStride, clipFrames);
-                        readPos = (readPos + stride) % clipFrames;
-                        pending -= stride;
-                    }
-                }
-                else if (pending > 0)
+                if (reader.TotalDropped > reportedDrops)
                 {
-                    EmitClipRange(clip, channels, dataRate, readPos, (int)pending, clipFrames);
-                    readPos = (int)((readPos + pending) % clipFrames);
-                    pending = 0;
+                    Utils.Warning($"MicrophoneSource: dropped {reader.TotalDropped - reportedDrops} buffered samples after a stall");
+                    reportedDrops = reader.TotalDropped;
                 }
-            }
-        }
 
-        // Reads `count` frames starting at `start`, splitting at the ring wrap so each GetData
-        // read is contiguous.
-        private void EmitClipRange(AudioClip clip, int channels, int dataRate, int start, int count, int clipFrames)
-        {
-            if (count <= 0) return;
-            int first = Math.Min(count, clipFrames - start);
-            ReadAndPush(clip, channels, dataRate, start, first);
-            if (count > first)
-                ReadAndPush(clip, channels, dataRate, 0, count - first);
+                for (int i = 0; i < ranges.Count; i++)
+                    ReadAndPush(clip, channels, ranges[i].Start, ranges[i].Count);
+            }
         }
 
-        // Reads a contiguous range, downmixes to mono, resamples dataRate -> TargetSampleRate
-        // (streaming linear interpolation carrying state across calls, so fragment junctions stay
+        // Reads a contiguous range, downmixes to mono, resamples clip.frequency ->
+        // TargetSampleRate (the resampler carries state across calls, so fragment junctions stay
         // continuous), and fires AudioRead.
-        private void ReadAndPush(AudioClip clip, int channels, int dataRate, int start, int count)
+        private void ReadAndPush(AudioClip clip, int channels, int start, int count)
         {
             if (count <= 0) return;
 
@@ -269,26 +210,9 @@ private void ReadAndPush(AudioClip clip, int channels, int dataRate, int start,
                 }
             }
 
-            double step = (double)dataRate / TargetSampleRate;
-            var output = new List<float>((int)(count / step) + 2);
-
-            // Index -1 maps to the carried last sample of the previous chunk so interpolation is
-            // continuous across chunk boundaries. pos stays >= -1.
-            double pos = _resamplePos;
-            while (pos < count - 1)
-            {
-                int i0 = (int)Math.Floor(pos);
-                float a = i0 < 0 ? _resamplePrev : mono[i0];
-                float b = mono[i0 + 1];
-                float frac = (float)(pos - i0);
-                output.Add(a * (1f - frac) + b * frac);
-                pos += step;
-            }
-            _resamplePrev = mono[count - 1];
-            _resamplePos = pos - count;
-
-            if (output.Count > 0)
-                AudioRead?.Invoke(output.ToArray(), 1, (int)TargetSampleRate);
+            var output = _resampler.Process(mono, count);
+            if (output.Length > 0)
+                AudioRead?.Invoke(output, 1, (int)TargetSampleRate);
         }
 
         /// <summary>
diff --git a/Tests/EditMode/MicClipReaderTests.cs b/Tests/EditMode/MicClipReaderTests.cs
new file mode 100644
index 00000000..54b3a534
--- /dev/null
+++ b/Tests/EditMode/MicClipReaderTests.cs
@@ -0,0 +1,243 @@
+using System;
+using System.Collections.Generic;
+using NUnit.Framework;
+using LiveKit.Internal;
+
+namespace LiveKit.EditModeTests
+{
+    /// <summary>
+    /// Tests for the microphone clip reading logic, including reconstruction of the fragmented
+    /// buffers produced by macOS with Bluetooth HFP headsets (valid fragments of 320 samples at a
+    /// 1024-sample stride with zero padding, position counter inflated k=3.2x; structure taken
+    /// from a raw buffer dump of a Sony MDR-1000X).
+    /// </summary>
+    public class MicClipReaderTests
+    {
+        const double PreRoll = 0.3;
+
+        static List<MicClipReader.ReadRange> Drain(MicClipReader reader, int counter, double t)
+        {
+            var ranges = new List<MicClipReader.ReadRange>();
+            reader.Update(counter, t, ranges);
+            return ranges;
+        }
+
+        // Runs the pre-roll with the given advance per tick, returning (counter, time) at the end.
+        static (int counter, double t) RunPreRoll(MicClipReader reader, int clipFrames, int advancePerTick, double dt)
+        {
+            int counter = 0;
+            double t = 0;
+            reader.Update(counter, t, new List<MicClipReader.ReadRange>());
+            while (!reader.Ready)
+            {
+                t += dt;
+                counter = (counter + advancePerTick) % clipFrames;
+                reader.Update(counter, t, new List<MicClipReader.ReadRange>());
+            }
+            return (counter, t);
+        }
+
+        [Test]
+        public void HealthyDevice_UsesContiguousMode_AndEmitsAllSamples()
+        {
+            const int clipFrames = 96000; // 2s @ 48k
+            const int rate = 48000;
+            const int perTick = 480;      // 10ms ticks at the data rate
+            const double dt = 0.01;
+
+            var reader = new MicClipReader(clipFrames, rate, PreRoll);
+            var (counter, t) = RunPreRoll(reader, clipFrames, perTick, dt);
+
+            Assert.IsFalse(reader.Fragmented);
+            Assert.AreEqual(1.0, reader.K, 0.02);
+
+            long emitted = 0;
+            for (int i = 0; i < 100; i++)
+            {
+                t += dt;
+                counter = (counter + perTick) % clipFrames;
+                foreach (var r in Drain(reader, counter, t))
+                {
+                    Assert.LessOrEqual(r.Start + r.Count, clipFrames, "range must not cross the ring wrap");
+                    emitted += r.Count;
+                }
+            }
+            Assert.AreEqual(100L * perTick, emitted, "contiguous mode must emit every written sample");
+        }
+
+        [Test]
+        public void FragmentedDevice_DetectsStrideAndValidCount()
+        {
+            const int clipFrames = 32000; // 2s @ 16k
+            const int rate = 16000;
+            const int stride = 1024;      // one counter jump per real 20ms packet
+            const double dt = 0.02;
+
+            var reader = new MicClipReader(clipFrames, rate, PreRoll);
+            RunPreRoll(reader, clipFrames, stride, dt);
+
+            Assert.IsTrue(reader.Fragmented);
+            Assert.AreEqual(3.2, reader.K, 0.05);
+            Assert.AreEqual(stride, reader.Stride);
+            Assert.AreEqual(320, reader.ValidPerStride);
+        }
+
+        [Test]
+        public void FragmentedDevice_ReconstructsContiguousStream()
+        {
+            const int clipFrames = 32000;
+            const int rate = 16000;
+            const int stride = 1024;
+            const int valid = 320;
+            const double dt = 0.02;
+
+            var reader = new MicClipReader(clipFrames, rate, PreRoll);
+
+            // Simulated clip: each tick the writer stores `valid` sequential marker values at the
+            // counter's previous position and zero-fills the rest of the stride, exactly like the
+            // dumped MDR-1000X buffer.
+            var clip = new float[clipFrames];
+            float marker = 1f;
+            int counter = 0;
+            double t = 0;
+            reader.Update(counter, t, new List<MicClipReader.ReadRange>());
+
+            void WriteFragment()
+            {
+                for (int i = 0; i < stride; i++)
+                    clip[(counter + i) % clipFrames] = i < valid ? marker + i : 0f;
+                marker += valid;
+                counter = (counter + stride) % clipFrames;
+            }
+
+            while (!reader.Ready)
+            {
+                t += dt;
+                WriteFragment();
+                reader.Update(counter, t, new List<MicClipReader.ReadRange>());
+            }
+
+            // Capture for several buffer laps and verify the emitted stream is the unbroken
+            // marker sequence: lossless reconstruction with no gaps, repeats, or padding.
+            var collected = new List<float>();
+            for (int tick = 0; tick < 200; tick++)
+            {
+                t += dt;
+                WriteFragment();
+                foreach (var r in Drain(reader, counter, t))
+                {
+                    Assert.LessOrEqual(r.Start + r.Count, clipFrames, "range must not cross the ring wrap");
+                    for (int i = 0; i < r.Count; i++)
+                        collected.Add(clip[r.Start + i]);
+                }
+            }
+
+            Assert.AreEqual(200 * valid, collected.Count, "every valid fragment must be emitted exactly once");
+            for (int i = 1; i < collected.Count; i++)
+                Assert.AreEqual(collected[i - 1] + 1f, collected[i], $"stream must be contiguous at index {i}");
+        }
+
+        [Test]
+        public void FragmentedDevice_DropsStaleBacklogStrideAligned()
+        {
+            const int clipFrames = 32000;
+            const int rate = 16000;
+            const int stride = 1024;
+            const double dt = 0.02;
+            const double maxBacklogSec = 0.2;
+
+            var reader = new MicClipReader(clipFrames, rate, PreRoll, 1.05, maxBacklogSec);
+            var (counter, t) = RunPreRoll(reader, clipFrames, stride, dt);
+
+            // One giant advance (a main-thread stall): 25 strides at once.
+            const int stalledStrides = 25;
+            counter = (counter + stalledStrides * stride) % clipFrames;
+            t += stalledStrides * dt;
+            var ranges = Drain(reader, counter, t);
+
+            Assert.Greater(reader.TotalDropped, 0, "stall backlog must be dropped");
+            Assert.AreEqual(0, reader.TotalDropped % stride, "drop must preserve stride alignment");
+
+            // Emitted + dropped must account for the whole advance (in counter units).
+            long emittedStrides = 0;
+            foreach (var r in ranges) emittedStrides += r.Count;
+            emittedStrides /= reader.ValidPerStride;
+            Assert.AreEqual(stalledStrides, emittedStrides + reader.TotalDropped / stride);
+
+            // The bounded burst must not exceed the backlog limit.
+            Assert.LessOrEqual(emittedStrides * stride, (long)(reader.CounterRate * maxBacklogSec));
+        }
+
+        [Test]
+        public void NoRangesAreEmittedDuringPreRoll()
+        {
+            const int clipFrames = 96000;
+            var reader = new MicClipReader(clipFrames, 48000, PreRoll);
+            var ranges = new List<MicClipReader.ReadRange>();
+            reader.Update(0, 0.0, ranges);
+            reader.Update(480, 0.01, ranges);
+            reader.Update(960, 0.02, ranges);
+            Assert.IsFalse(reader.Ready);
+            Assert.IsEmpty(ranges);
+        }
+    }
+
+    public class StreamingResamplerTests
+    {
+        static float[] Sine(int count, double freqHz, int rate)
+        {
+            var s = new float[count];
+            for (int i = 0; i < count; i++)
+                s[i] = (float)Math.Sin(2.0 * Math.PI * freqHz * i / rate);
+            return s;
+        }
+
+        static int ZeroCrossings(IReadOnlyList<float> s)
+        {
+            int n = 0;
+            for (int i = 1; i < s.Count; i++)
+                if ((s[i - 1] < 0f) != (s[i] < 0f)) n++;
+            return n;
+        }
+
+        [Test]
+        public void Upsample16kTo48k_PreservesFrequencyAndLength()
+        {
+            const int inRate = 16000, outRate = 48000;
+            var input = Sine(16000, 200.0, inRate); // 1s of 200Hz
+            var resampler = new StreamingResampler(inRate, outRate);
+            var output = resampler.Process(input, input.Length);
+
+            Assert.AreEqual(outRate, output.Length, outRate / 100, "1s in should be ~1s out at the new rate");
+            // 200Hz over ~1s crosses zero ~400 times regardless of sample rate.
+            Assert.AreEqual(ZeroCrossings(input), ZeroCrossings(output), 4);
+        }
+
+        [Test]
+        public void ChunkedProcessing_MatchesWholeProcessing()
+        {
+            const int inRate = 16000, outRate = 48000;
+            var input = Sine(3200, 250.0, inRate);
+
+            var whole = new StreamingResampler(inRate, outRate).Process(input, input.Length);
+
+            // Process the same stream in 320-sample fragments (the MDR-1000X packet size).
+            var chunked = new List<float>();
+            var resampler = new StreamingResampler(inRate, outRate);
+            for (int off = 0; off < input.Length; off += 320)
+            {
+                var chunk = new float[320];
+                Array.Copy(input, off, chunk, 0, 320);
+                chunked.AddRange(resampler.Process(chunk, 320));
+            }
+
+            // Accumulated floating-point rounding differs by an ulp between the two paths (the
+            // chunked position is renormalized per chunk), which can flip the final boundary
+            // sample — allow a 1-sample tail difference, but the overlap must match exactly.
+            Assert.AreEqual(whole.Length, chunked.Count, 1, "chunking must not change the output length (±1 tail sample)");
+            int overlap = Math.Min(whole.Length, chunked.Count);
+            for (int i = 0; i < overlap; i++)
+                Assert.AreEqual(whole[i], chunked[i], 1e-4f, $"chunked output diverges at {i}");
+        }
+    }
+}

From d92df2d68f2a9be0c3be71b81870ef1a53c14aa6 Mon Sep 17 00:00:00 2001
From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com>
Date: Fri, 12 Jun 2026 16:49:48 +0200
Subject: [PATCH 3/5] Fix MissingComponentException when unpublishing the
 microphone

MicrophoneSource no longer attaches an AudioSource to its GameObject (it
reads the mic clip directly), but the Meet sample still called
GetComponent<AudioSource>()?.Stop() on unpublish. The ?. operator bypasses
Unity's overloaded null-check on the editor's missing-component stub, so
Stop() ran on the stub and threw MissingComponentException. Remove the
obsolete call.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 Samples~/Meet/Assets/Runtime/MeetManager.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs
index 97b2cb70..baa70837 100644
--- a/Samples~/Meet/Assets/Runtime/MeetManager.cs
+++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs
@@ -487,7 +487,7 @@ private void UnpublishLocalMicrophone()
 
         if (_audioObjects.TryGetValue(LocalAudioTrackName, out var obj))
         {
-            obj.GetComponent<AudioSource>()?.Stop();
+            // MicrophoneSource reads the mic clip directly; no AudioSource is attached anymore.
             Destroy(obj);
             _audioObjects.Remove(LocalAudioTrackName);
         }

From 9b63c7556b35f2539ce3001602036a12020a40ad Mon Sep 17 00:00:00 2001
From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com>
Date: Fri, 12 Jun 2026 17:19:55 +0200
Subject: [PATCH 4/5] Prevent false fragmented-mode detection; fix remaining
 AudioSource cleanup

Field testing device transitions surfaced a false positive: right after
recovering onto the healthy MacBook microphone, the pre-roll measured k=1.07
(counter startup burst while driver buffers flush) which crossed the old 1.05
threshold and engaged fragmented mode - silently discarding ~6% of real audio
(heard as choppiness) until the next re-measurement.

Engaging fragmented mode discards (stride - valid) samples per stride, so a
false positive guarantees audio loss while a false negative only risks mild
artifacts. Fix both sides of the measurement:

- Raise the fragmented threshold from 1.05 to 1.5: the observed pathological
  device measures k=3.2, healthy devices ~1.0 plus a few percent of noise -
  keep a wide margin between the two.
- Add a 100ms settle window that discards the counter's startup burst before
  the rate measurement begins.

Add a regression test for the borderline case (k=1.07 must stay contiguous).

Also fix the second AudioSource null-propagation site (CleanUpAllTracks via
OnDestroy) with TryGetComponent - same MissingComponentException class as the
unpublish path, hit because the local mic object no longer carries an
AudioSource.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 Runtime/Scripts/Internal/MicClipReader.cs   | 25 +++++++++++++++++----
 Runtime/Scripts/MicrophoneSource.cs         |  9 ++++++--
 Samples~/Meet/Assets/Runtime/MeetManager.cs |  5 ++++-
 Tests/EditMode/MicClipReaderTests.cs        | 18 +++++++++++++++
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/Runtime/Scripts/Internal/MicClipReader.cs b/Runtime/Scripts/Internal/MicClipReader.cs
index cb04b9fa..99a49bbb 100644
--- a/Runtime/Scripts/Internal/MicClipReader.cs
+++ b/Runtime/Scripts/Internal/MicClipReader.cs
@@ -30,10 +30,12 @@ public struct ReadRange
         private readonly double _preRollSeconds;
         private readonly double _fragmentedKThreshold;
         private readonly double _maxBacklogSeconds;
+        private readonly double _settleSeconds;
 
         private bool _hasFirstSample;
         private int _prevCounter;
-        private double _preRollStart;
+        private double _firstSampleTime;
+        private double _measureStart = double.NaN;
         private long _preRollAdvance;
         private long _minJump = long.MaxValue;
 
@@ -63,7 +65,8 @@ public struct ReadRange
         public long TotalDropped { get; private set; }
 
         public MicClipReader(int clipFrames, int dataRate,
-            double preRollSeconds = 0.3, double fragmentedKThreshold = 1.05, double maxBacklogSeconds = 0.2)
+            double preRollSeconds = 0.3, double fragmentedKThreshold = 1.5, double maxBacklogSeconds = 0.2,
+            double settleSeconds = 0.1)
         {
             if (clipFrames <= 0) throw new ArgumentOutOfRangeException(nameof(clipFrames));
             if (dataRate <= 0) throw new ArgumentOutOfRangeException(nameof(dataRate));
@@ -72,6 +75,7 @@ public MicClipReader(int clipFrames, int dataRate,
             _preRollSeconds = preRollSeconds;
             _fragmentedKThreshold = fragmentedKThreshold;
             _maxBacklogSeconds = maxBacklogSeconds;
+            _settleSeconds = settleSeconds;
         }
 
         /// <summary>
@@ -84,7 +88,7 @@ public void Update(int counterPosition, double elapsedSeconds, List<ReadRange> r
             {
                 _hasFirstSample = true;
                 _prevCounter = counterPosition;
-                _preRollStart = elapsedSeconds;
+                _firstSampleTime = elapsedSeconds;
                 return;
             }
 
@@ -93,9 +97,22 @@ public void Update(int counterPosition, double elapsedSeconds, List<ReadRange> r
 
             if (!Ready)
             {
+                // Discard the settle window entirely: right after a device starts, the counter can
+                // burst ahead while driver buffers flush, which would inflate the measured rate
+                // (observed: a healthy device measuring k=1.07 right after a device transition).
+                if (elapsedSeconds - _firstSampleTime < _settleSeconds)
+                    return;
+                if (double.IsNaN(_measureStart))
+                {
+                    // Anchor the measurement window here; the delta spanning the settle boundary
+                    // is discarded with the settle period.
+                    _measureStart = elapsedSeconds;
+                    return;
+                }
+
                 _preRollAdvance += d;
                 if (d > 0 && d < _minJump) _minJump = d;
-                double window = elapsedSeconds - _preRollStart;
+                double window = elapsedSeconds - _measureStart;
                 if (window >= _preRollSeconds)
                     FinishPreRoll(window);
                 return;
diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs
index 89f3f68f..b424e2a5 100644
--- a/Runtime/Scripts/MicrophoneSource.cs
+++ b/Runtime/Scripts/MicrophoneSource.cs
@@ -26,7 +26,12 @@ sealed public class MicrophoneSource : RtcAudioSource
         // captured samples are resampled from clip.frequency to the fixed native-source rate.
         private const uint TargetSampleRate = 48000;
         private const float PreRollSeconds = 0.3f;
-        private const double FragmentedKThreshold = 1.05;
+        private const float SettleSeconds = 0.1f;     // discard the counter's startup burst before measuring
+        // Engaging fragmented mode discards (stride - valid) samples per stride, so a false
+        // positive guarantees audio loss while a false negative only risks mild artifacts. The
+        // observed pathological device measures k=3.2; healthy devices measure ~1.0 with up to a
+        // few percent of startup noise. Keep a wide margin between the two.
+        private const double FragmentedKThreshold = 1.5;
         private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall
 
         private readonly string _deviceName;
@@ -149,7 +154,7 @@ private IEnumerator CaptureLoop(AudioClip clip)
             int channels = clip.channels;
             int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate;
 
-            var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds);
+            var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds, SettleSeconds);
             _resampler = new StreamingResampler(dataRate, (int)TargetSampleRate);
             var ranges = new List<MicClipReader.ReadRange>();
             var clock = System.Diagnostics.Stopwatch.StartNew();
diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs
index baa70837..532aa319 100644
--- a/Samples~/Meet/Assets/Runtime/MeetManager.cs
+++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs
@@ -566,7 +566,10 @@ private void CleanUpAllTracks()
         foreach (var obj in _audioObjects.Values)
         {
             if (obj == null) continue;
-            obj.GetComponent<AudioSource>()?.Stop();
+            // Not every audio object has an AudioSource (the local mic object no longer does), and
+            // ?. on GetComponent bypasses Unity's missing-component null semantics in the editor.
+            if (obj.TryGetComponent<AudioSource>(out var audioSource))
+                audioSource.Stop();
             Destroy(obj);
         }
         _audioObjects.Clear();
diff --git a/Tests/EditMode/MicClipReaderTests.cs b/Tests/EditMode/MicClipReaderTests.cs
index 54b3a534..36a563d5 100644
--- a/Tests/EditMode/MicClipReaderTests.cs
+++ b/Tests/EditMode/MicClipReaderTests.cs
@@ -168,6 +168,24 @@ public void FragmentedDevice_DropsStaleBacklogStrideAligned()
             Assert.LessOrEqual(emittedStrides * stride, (long)(reader.CounterRate * maxBacklogSec));
         }
 
+        [Test]
+        public void SlightlyInflatedCounter_StaysContiguous()
+        {
+            // Regression: a healthy MacBook mic measured k=1.07 right after a device transition
+            // (startup-burst noise), and the old 1.05 threshold engaged fragmented mode, silently
+            // discarding ~6% of real audio. Borderline rates must stay contiguous.
+            const int clipFrames = 96000;
+            const int rate = 48000;
+            const int perTick = 514; // ~k=1.07 at 10ms ticks
+            const double dt = 0.01;
+
+            var reader = new MicClipReader(clipFrames, rate, PreRoll);
+            RunPreRoll(reader, clipFrames, perTick, dt);
+
+            Assert.IsFalse(reader.Fragmented, "k slightly above 1 must not trigger fragmented mode");
+            Assert.AreEqual(1.07, reader.K, 0.02);
+        }
+
         [Test]
         public void NoRangesAreEmittedDuringPreRoll()
         {

From 457ce3fbe4f2a5f6c284241100bd8701ea20ddb9 Mon Sep 17 00:00:00 2001
From: Max Heimbrock <43608204+MaxHeimbrock@users.noreply.github.com>
Date: Fri, 12 Jun 2026 17:22:03 +0200
Subject: [PATCH 5/5] Add Unity meta files for the new scripts

Generated by the editor; required for stable GUIDs when the package is
imported.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 Runtime/Scripts/Internal/MicClipReader.cs.meta      | 11 +++++++++++
 Runtime/Scripts/Internal/StreamingResampler.cs.meta | 11 +++++++++++
 Tests/EditMode/MicClipReaderTests.cs.meta           | 11 +++++++++++
 3 files changed, 33 insertions(+)
 create mode 100644 Runtime/Scripts/Internal/MicClipReader.cs.meta
 create mode 100644 Runtime/Scripts/Internal/StreamingResampler.cs.meta
 create mode 100644 Tests/EditMode/MicClipReaderTests.cs.meta

diff --git a/Runtime/Scripts/Internal/MicClipReader.cs.meta b/Runtime/Scripts/Internal/MicClipReader.cs.meta
new file mode 100644
index 00000000..88aa56bd
--- /dev/null
+++ b/Runtime/Scripts/Internal/MicClipReader.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: d0ae29390ef914aa6b62ae81c9b4f212
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/Runtime/Scripts/Internal/StreamingResampler.cs.meta b/Runtime/Scripts/Internal/StreamingResampler.cs.meta
new file mode 100644
index 00000000..26d7c37c
--- /dev/null
+++ b/Runtime/Scripts/Internal/StreamingResampler.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 967338b84cfb74bdebca9132f3b9abd0
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
diff --git a/Tests/EditMode/MicClipReaderTests.cs.meta b/Tests/EditMode/MicClipReaderTests.cs.meta
new file mode 100644
index 00000000..70af710c
--- /dev/null
+++ b/Tests/EditMode/MicClipReaderTests.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: b8df68a85510e4aa58359a4dd8b170c6
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: