diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs
index 3b63680b..8193090d 100644
--- a/Runtime/Scripts/BasicAudioSource.cs
+++ b/Runtime/Scripts/BasicAudioSource.cs
@@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource
/// Creates a new basic audio source for the given in the scene.
///
/// The to capture from.
- /// The number of channels to capture.
/// The type of audio source.
- public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType)
+ ///
+ /// The sample rate and channel count are taken from Unity's audio configuration.
+ ///
+ public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType)
{
_source = source;
}
diff --git a/Runtime/Scripts/Internal/MicClipReader.cs b/Runtime/Scripts/Internal/MicClipReader.cs
new file mode 100644
index 00000000..99a49bbb
--- /dev/null
+++ b/Runtime/Scripts/Internal/MicClipReader.cs
@@ -0,0 +1,177 @@
+using System;
+using System.Collections.Generic;
+
+namespace LiveKit.Internal
+{
+ ///
+ /// Pure logic for reading a looping microphone clip ring buffer whose position counter may be
+ /// unreliable. Free of UnityEngine dependencies so it can be unit tested.
+ ///
+ ///
+ /// On most devices the position counter advances at the clip's data rate and capture is a
+ /// plain contiguous read. On macOS with a Bluetooth HFP headset, however, FMOD writes each
+ /// real packet of clip.frequency audio and then advances the counter k (~3.2) times too far,
+ /// zero-filling the skipped range: the buffer holds valid fragments of N samples at a stride
+ /// J (measured: 320 of every 1024), the fragments join continuously, and the counter rate is
+ /// k = J/N times the data rate. The reader measures the counter rate and its smallest
+ /// discrete jump during a pre-roll; when the rate is inflated it emits only the first J/k
+ /// samples of each stride, reconstructing the contiguous stream.
+ ///
+ internal sealed class MicClipReader
+ {
+ public struct ReadRange
+ {
+ public int Start;
+ public int Count;
+ }
+
+ private readonly int _clipFrames;
+ private readonly int _dataRate;
+ private readonly double _preRollSeconds;
+ private readonly double _fragmentedKThreshold;
+ private readonly double _maxBacklogSeconds;
+ private readonly double _settleSeconds;
+
+ private bool _hasFirstSample;
+ private int _prevCounter;
+ private double _firstSampleTime;
+ private double _measureStart = double.NaN;
+ private long _preRollAdvance;
+ private long _minJump = long.MaxValue;
+
+ private long _maxBacklog;
+ private int _readPos;
+ private long _pending;
+
+ /// False during the pre-roll measurement window; no ranges are emitted until ready.
+ public bool Ready { get; private set; }
+
+ /// True when the counter rate is inflated and only part of each stride holds data.
+ public bool Fragmented { get; private set; }
+
+ /// Counter samples per fragment cycle (0 when not fragmented).
+ public int Stride { get; private set; }
+
+ /// Valid data samples at the start of each stride (0 when not fragmented).
+ public int ValidPerStride { get; private set; }
+
+ /// Measured counter advance per second.
+ public double CounterRate { get; private set; }
+
+ /// Counter inflation factor: CounterRate / dataRate (~1 on healthy devices).
+ public double K { get; private set; }
+
+ /// Total counter samples discarded by backlog drops (e.g. after a stall).
+ public long TotalDropped { get; private set; }
+
+ public MicClipReader(int clipFrames, int dataRate,
+ double preRollSeconds = 0.3, double fragmentedKThreshold = 1.5, double maxBacklogSeconds = 0.2,
+ double settleSeconds = 0.1)
+ {
+ if (clipFrames <= 0) throw new ArgumentOutOfRangeException(nameof(clipFrames));
+ if (dataRate <= 0) throw new ArgumentOutOfRangeException(nameof(dataRate));
+ _clipFrames = clipFrames;
+ _dataRate = dataRate;
+ _preRollSeconds = preRollSeconds;
+ _fragmentedKThreshold = fragmentedKThreshold;
+ _maxBacklogSeconds = maxBacklogSeconds;
+ _settleSeconds = settleSeconds;
+ }
+
+ ///
+ /// Feeds the current counter position at a monotonic time and appends the ranges that
+ /// should be read from the clip (already split at the ring wrap) to .
+ ///
+ public void Update(int counterPosition, double elapsedSeconds, List ranges)
+ {
+ if (!_hasFirstSample)
+ {
+ _hasFirstSample = true;
+ _prevCounter = counterPosition;
+ _firstSampleTime = elapsedSeconds;
+ return;
+ }
+
+ long d = ((counterPosition - _prevCounter) % _clipFrames + _clipFrames) % _clipFrames;
+ _prevCounter = counterPosition;
+
+ if (!Ready)
+ {
+ // Discard the settle window entirely: right after a device starts, the counter can
+ // burst ahead while driver buffers flush, which would inflate the measured rate
+ // (observed: a healthy device measuring k=1.07 right after a device transition).
+ if (elapsedSeconds - _firstSampleTime < _settleSeconds)
+ return;
+ if (double.IsNaN(_measureStart))
+ {
+ // Anchor the measurement window here; the delta spanning the settle boundary
+ // is discarded with the settle period.
+ _measureStart = elapsedSeconds;
+ return;
+ }
+
+ _preRollAdvance += d;
+ if (d > 0 && d < _minJump) _minJump = d;
+ double window = elapsedSeconds - _measureStart;
+ if (window >= _preRollSeconds)
+ FinishPreRoll(window);
+ return;
+ }
+
+ _pending += d;
+
+ // After a long stall, drop the oldest backlog instead of pushing a burst that would
+ // overrun the consumer. (A stall longer than one counter lap aliases the unwrapped
+ // advance and silently loses whole laps; this bound covers everything observable.)
+ if (_pending > _maxBacklog)
+ {
+ long drop = _pending - _maxBacklog;
+ if (Fragmented) drop -= drop % Stride; // preserve stride alignment
+ if (drop > 0)
+ {
+ _readPos = (int)((_readPos + drop) % _clipFrames);
+ _pending -= drop;
+ TotalDropped += drop;
+ }
+ }
+
+ if (Fragmented)
+ {
+ while (_pending >= Stride)
+ {
+ EmitSplit(_readPos, ValidPerStride, ranges);
+ _readPos = (_readPos + Stride) % _clipFrames;
+ _pending -= Stride;
+ }
+ }
+ else if (_pending > 0)
+ {
+ EmitSplit(_readPos, (int)_pending, ranges);
+ _readPos = (int)((_readPos + _pending) % _clipFrames);
+ _pending = 0;
+ }
+ }
+
+ private void FinishPreRoll(double windowSeconds)
+ {
+ CounterRate = _preRollAdvance > 0 ? _preRollAdvance / windowSeconds : _dataRate;
+ K = CounterRate / _dataRate;
+ Fragmented = K > _fragmentedKThreshold && _minJump != long.MaxValue && _minJump > 1;
+ Stride = Fragmented ? (int)_minJump : 0;
+ ValidPerStride = Fragmented ? Math.Max(1, (int)Math.Round(Stride / K)) : 0;
+ _maxBacklog = (long)(CounterRate * _maxBacklogSeconds);
+ _readPos = _prevCounter; // counter values land on jump boundaries
+ _pending = 0;
+ Ready = true;
+ }
+
+ private void EmitSplit(int start, int count, List ranges)
+ {
+ if (count <= 0) return;
+ int first = Math.Min(count, _clipFrames - start);
+ ranges.Add(new ReadRange { Start = start, Count = first });
+ if (count > first)
+ ranges.Add(new ReadRange { Start = 0, Count = count - first });
+ }
+ }
+}
diff --git a/Runtime/Scripts/Internal/MicClipReader.cs.meta b/Runtime/Scripts/Internal/MicClipReader.cs.meta
new file mode 100644
index 00000000..88aa56bd
--- /dev/null
+++ b/Runtime/Scripts/Internal/MicClipReader.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: d0ae29390ef914aa6b62ae81c9b4f212
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Scripts/Internal/StreamingResampler.cs b/Runtime/Scripts/Internal/StreamingResampler.cs
new file mode 100644
index 00000000..dd9be2d6
--- /dev/null
+++ b/Runtime/Scripts/Internal/StreamingResampler.cs
@@ -0,0 +1,54 @@
+using System;
+using System.Collections.Generic;
+
+namespace LiveKit.Internal
+{
+ ///
+ /// Streaming linear resampler for mono audio. Interpolation state carries across chunks, so a
+ /// stream processed in arbitrary slices produces the same output as processing it whole.
+ /// Free of UnityEngine dependencies so it can be unit tested.
+ ///
+ internal sealed class StreamingResampler
+ {
+ private readonly double _step; // input samples advanced per output sample
+ private double _pos; // fractional read position; >= -1, where -1 maps to _prev
+ private float _prev; // last sample of the previous chunk
+
+ public StreamingResampler(int inputRate, int outputRate)
+ {
+ if (inputRate <= 0) throw new ArgumentOutOfRangeException(nameof(inputRate));
+ if (outputRate <= 0) throw new ArgumentOutOfRangeException(nameof(outputRate));
+ _step = (double)inputRate / outputRate;
+ }
+
+ public void Reset()
+ {
+ _pos = 0.0;
+ _prev = 0f;
+ }
+
+ ///
+ /// Resamples the first samples of and
+ /// returns the produced output samples (possibly empty for very small chunks).
+ ///
+ public float[] Process(float[] input, int count)
+ {
+ if (count <= 0) return Array.Empty();
+
+ var output = new List((int)(count / _step) + 2);
+ double pos = _pos;
+ while (pos < count - 1)
+ {
+ int i0 = (int)Math.Floor(pos);
+ float a = i0 < 0 ? _prev : input[i0];
+ float b = input[i0 + 1];
+ float frac = (float)(pos - i0);
+ output.Add(a * (1f - frac) + b * frac);
+ pos += _step;
+ }
+ _prev = input[count - 1];
+ _pos = pos - count;
+ return output.ToArray();
+ }
+ }
+}
diff --git a/Runtime/Scripts/Internal/StreamingResampler.cs.meta b/Runtime/Scripts/Internal/StreamingResampler.cs.meta
new file mode 100644
index 00000000..26d7c37c
--- /dev/null
+++ b/Runtime/Scripts/Internal/StreamingResampler.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 967338b84cfb74bdebca9132f3b9abd0
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs
index 904b8da7..b424e2a5 100644
--- a/Runtime/Scripts/MicrophoneSource.cs
+++ b/Runtime/Scripts/MicrophoneSource.cs
@@ -1,5 +1,6 @@
using System;
using System.Collections;
+using System.Collections.Generic;
using UnityEngine;
using LiveKit.Internal;
@@ -13,25 +14,57 @@ namespace LiveKit
///
sealed public class MicrophoneSource : RtcAudioSource
{
- private readonly GameObject _sourceObject;
+ // --- Capture design ---
+ // The microphone clip's ring buffer is read directly (no AudioSource playback, no
+ // OnAudioFilterRead), so capture is decoupled from the output device's clock.
+ //
+ // Microphone.GetPosition cannot be trusted as a sample position on every platform; see
+ // MicClipReader for the fragmented-buffer model (macOS + Bluetooth HFP) and how the
+ // contiguous stream is reconstructed from it.
+ //
+ // The clip's data rate is clip.frequency (verified: fragments play at correct pitch), so
+ // captured samples are resampled from clip.frequency to the fixed native-source rate.
+ private const uint TargetSampleRate = 48000;
+ private const float PreRollSeconds = 0.3f;
+ private const float SettleSeconds = 0.1f; // discard the counter's startup burst before measuring
+ // Engaging fragmented mode discards (stride - valid) samples per stride, so a false
+ // positive guarantees audio loss while a false negative only risks mild artifacts. The
+ // observed pathological device measures k=3.2; healthy devices measure ~1.0 with up to a
+ // few percent of startup noise. Keep a wide margin between the two.
+ private const double FragmentedKThreshold = 1.5;
+ private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall
+
private readonly string _deviceName;
public override event Action AudioRead;
private bool _disposed = false;
private bool _started = false;
+ private volatile bool _capturing = false;
+
+ private StreamingResampler _resampler;
///
/// Creates a new microphone source for the given device.
///
/// The name of the device to capture from. Use to
/// get the list of available devices.
- /// The GameObject to attach the AudioSource to. The object must be kept in the scene
- /// for the duration of the source's lifetime.
- public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone)
+ /// Unused; retained for compatibility. The microphone clip is read
+ /// directly, so no scene GameObject/AudioSource is required.
+ public MicrophoneSource(string deviceName, GameObject sourceObject)
+ : base(RtcAudioSourceType.AudioSourceMicrophone, TargetSampleRate, 1)
{
_deviceName = deviceName;
- _sourceObject = sourceObject;
+ }
+
+ // The rate requested from Microphone.Start (a hint the platform may not honor), clamped to
+ // the device's reported range. The authoritative data rate is clip.frequency afterwards.
+ private static int ResolveRequestedSampleRate(string deviceName)
+ {
+ Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq);
+ if (minFreq == 0 && maxFreq == 0)
+ return (int)TargetSampleRate;
+ return Mathf.Clamp((int)TargetSampleRate, minFreq, maxFreq);
}
///
@@ -49,7 +82,6 @@ public override void Start()
base.Start();
if (_started) return;
-
if (!Application.HasUserAuthorization(mode: UserAuthorization.Microphone))
throw new InvalidOperationException("Microphone access not authorized");
@@ -61,13 +93,6 @@ public override void Start()
private IEnumerator StartMicrophone()
{
- // Validate that the GameObject is still valid before starting
- if (_sourceObject == null)
- {
- Utils.Error("MicrophoneSource: GameObject is null, cannot start microphone");
- yield break;
- }
-
// Verify microphone is still authorized (could change during background)
if (!Application.HasUserAuthorization(UserAuthorization.Microphone))
{
@@ -76,13 +101,14 @@ private IEnumerator StartMicrophone()
}
AudioClip clip = null;
+ int requestedRate = ResolveRequestedSampleRate(_deviceName);
try
{
clip = Microphone.Start(
_deviceName,
loop: true,
- lengthSec: 1,
- frequency: (int)DefaultMicrophoneSampleRate
+ lengthSec: 2,
+ frequency: requestedRate
);
}
catch (Exception e)
@@ -97,29 +123,6 @@ private IEnumerator StartMicrophone()
yield break;
}
- // Ensure no duplicate components exist before adding new ones.
- // This is important during app resume on iOS where components might not be
- // fully destroyed yet due to Unity's deferred Destroy().
- var existingSource = _sourceObject.GetComponent();
- if (existingSource != null)
- UnityEngine.Object.DestroyImmediate(existingSource);
-
- var existingProbe = _sourceObject.GetComponent();
- if (existingProbe != null)
- {
- existingProbe.AudioRead -= OnAudioRead;
- UnityEngine.Object.DestroyImmediate(existingProbe);
- }
-
- var source = _sourceObject.AddComponent();
- source.clip = clip;
- source.loop = true;
-
- var probe = _sourceObject.AddComponent();
- // Clear the audio data after it is read as to not play it through the speaker locally.
- probe.ClearAfterInvocation();
- probe.AudioRead += OnAudioRead;
-
// Wait for microphone to actually start producing data with a timeout
const float timeout = 2f;
float elapsed = 0f;
@@ -135,8 +138,86 @@ private IEnumerator StartMicrophone()
yield break;
}
- source.Play();
- Utils.Debug($"MicrophoneSource device='{_deviceName}' started successfully");
+ Utils.Info($"MicrophoneSource device='{_deviceName}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={TargetSampleRate}Hz");
+
+ _capturing = true;
+ MonoBehaviourContext.RunCoroutine(CaptureLoop(clip));
+ }
+
+ // Reads new samples from the clip's ring buffer each frame and pushes them to the native
+ // source via AudioRead. MicClipReader decides what to read (including reconstructing
+ // fragmented buffers); this loop is the thin Unity shell around it. Runs on the main
+ // thread; the native source's queue absorbs the per-frame pacing jitter.
+ private IEnumerator CaptureLoop(AudioClip clip)
+ {
+ int clipFrames = clip.samples;
+ int channels = clip.channels;
+ int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate;
+
+ var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds, SettleSeconds);
+ _resampler = new StreamingResampler(dataRate, (int)TargetSampleRate);
+ var ranges = new List();
+ var clock = System.Diagnostics.Stopwatch.StartNew();
+ bool announced = false;
+ long reportedDrops = 0;
+
+ while (_capturing && !_disposed)
+ {
+ yield return null;
+
+ ranges.Clear();
+ reader.Update(Microphone.GetPosition(_deviceName), clock.Elapsed.TotalSeconds, ranges);
+
+ if (!announced && reader.Ready)
+ {
+ announced = true;
+ if (reader.Fragmented)
+ Utils.Info($"MicrophoneSource: fragmented clip detected (k={reader.K:F2}); reading {reader.ValidPerStride} of every {reader.Stride} samples at {dataRate}Hz");
+ else
+ Utils.Info($"MicrophoneSource: contiguous capture (k={reader.K:F2}) at {dataRate}Hz");
+ }
+
+ if (reader.TotalDropped > reportedDrops)
+ {
+ Utils.Warning($"MicrophoneSource: dropped {reader.TotalDropped - reportedDrops} buffered samples after a stall");
+ reportedDrops = reader.TotalDropped;
+ }
+
+ for (int i = 0; i < ranges.Count; i++)
+ ReadAndPush(clip, channels, ranges[i].Start, ranges[i].Count);
+ }
+ }
+
+ // Reads a contiguous range, downmixes to mono, resamples clip.frequency ->
+ // TargetSampleRate (the resampler carries state across calls, so fragment junctions stay
+ // continuous), and fires AudioRead.
+ private void ReadAndPush(AudioClip clip, int channels, int start, int count)
+ {
+ if (count <= 0) return;
+
+ var interleaved = new float[count * channels];
+ clip.GetData(interleaved, start);
+
+ float[] mono;
+ if (channels == 1)
+ {
+ mono = interleaved;
+ }
+ else
+ {
+ mono = new float[count];
+ for (int f = 0; f < count; f++)
+ {
+ float sum = 0f;
+ for (int ch = 0; ch < channels; ch++)
+ sum += interleaved[f * channels + ch];
+ mono[f] = sum / channels;
+ }
+ }
+
+ var output = _resampler.Process(mono, count);
+ if (output.Length > 0)
+ AudioRead?.Invoke(output, 1, (int)TargetSampleRate);
}
///
@@ -152,33 +233,15 @@ public override void Stop()
private IEnumerator StopMicrophone()
{
+ _capturing = false;
+
if (Microphone.IsRecording(_deviceName))
Microphone.End(_deviceName);
- // Check if GameObject is still valid before trying to access components
- if (_sourceObject != null)
- {
- var probe = _sourceObject.GetComponent();
- if (probe != null)
- {
- probe.AudioRead -= OnAudioRead;
- UnityEngine.Object.Destroy(probe);
- }
-
- var source = _sourceObject.GetComponent();
- if (source != null)
- UnityEngine.Object.Destroy(source);
- }
-
Utils.Debug($"MicrophoneSource device='{_deviceName}' stopped");
yield return null;
}
- private void OnAudioRead(float[] data, int channels, int sampleRate)
- {
- AudioRead?.Invoke(data, channels, sampleRate);
- }
-
private void OnApplicationPause(bool pause)
{
if (!_started)
@@ -246,4 +309,4 @@ protected override void Dispose(bool disposing)
Dispose(false);
}
}
-}
\ No newline at end of file
+}
diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs
index a9af8a0a..43f5c102 100644
--- a/Runtime/Scripts/RtcAudioSource.cs
+++ b/Runtime/Scripts/RtcAudioSource.cs
@@ -83,20 +83,33 @@ private sealed class PendingAudioFrame
private volatile bool _disposed = false;
private int _audioReadCount = 0;
- protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom)
+ // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of
+ // time — it is whatever Unity's audio graph delivers. They use this constructor, which
+ // configures the native source from Unity's current output configuration.
+ protected RtcAudioSource(RtcAudioSourceType audioSourceType)
+ : this(audioSourceType, 0, 0) { }
+
+ // Sources that generate a fixed, known format (e.g. test signal generators) declare it
+ // directly. Passing 0 for either value falls back to the device configuration.
+ protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels)
{
_sourceType = audioSourceType;
- _expectedChannels = (uint)channels;
+
+ if (sampleRate > 0 && channels > 0)
+ {
+ _expectedSampleRate = sampleRate;
+ _expectedChannels = channels;
+ }
+ else
+ {
+ (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat();
+ }
using var request = FFIBridge.Instance.NewRequest();
var newAudioSource = request.request;
newAudioSource.Type = AudioSourceType.AudioSourceNative;
- newAudioSource.NumChannels = (uint)channels;
- newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ?
- DefaultMicrophoneSampleRate : DefaultSampleRate;
- _expectedSampleRate = newAudioSource.SampleRate;
-
- Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}");
+ newAudioSource.NumChannels = _expectedChannels;
+ newAudioSource.SampleRate = _expectedSampleRate;
newAudioSource.Options = request.TempResource();
newAudioSource.Options.EchoCancellation = true;
@@ -109,6 +122,49 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType =
Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}");
}
+ // Reads Unity's actual output audio configuration. The capture path delivers buffers at the
+ // DSP output rate/channel count (see AudioProbe), so this is the format the native source
+ // must match. Falls back to the platform defaults when Unity cannot report a configuration
+ // (e.g. batch mode without an audio device).
+ private (uint sampleRate, uint channels) ResolveDeviceFormat()
+ {
+ uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone
+ ? DefaultMicrophoneSampleRate
+ : DefaultSampleRate;
+ uint channels = DefaultChannels;
+
+ try
+ {
+ var config = UnityEngine.AudioSettings.GetConfiguration();
+ if (config.sampleRate > 0)
+ sampleRate = (uint)config.sampleRate;
+ var configuredChannels = SpeakerModeChannels(config.speakerMode);
+ if (configuredChannels > 0)
+ channels = configuredChannels;
+ }
+ catch (Exception e)
+ {
+ Utils.Warning($"{DebugTag} could not read Unity audio configuration, using defaults: {e.Message}");
+ }
+
+ return (sampleRate, channels);
+ }
+
+ private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode)
+ {
+ switch (mode)
+ {
+ case UnityEngine.AudioSpeakerMode.Mono: return 1;
+ case UnityEngine.AudioSpeakerMode.Stereo: return 2;
+ case UnityEngine.AudioSpeakerMode.Quad: return 4;
+ case UnityEngine.AudioSpeakerMode.Surround: return 5;
+ case UnityEngine.AudioSpeakerMode.Mode5point1: return 6;
+ case UnityEngine.AudioSpeakerMode.Mode7point1: return 8;
+ case UnityEngine.AudioSpeakerMode.Prologic: return 2;
+ default: return 0;
+ }
+ }
+
///
/// Begin capturing audio samples from the underlying source.
///
@@ -153,9 +209,16 @@ private void OnAudioRead(float[] data, int channels, int sampleRate)
return;
}
+ // The native source rejects frames whose rate/channels differ from how it was
+ // configured (it does not resample). This should not happen now that sources declare
+ // or resolve their real format, but if Unity reports an inconsistent format — or the
+ // output configuration changes at runtime — we drop the frame instead of sending a
+ // mismatch the native side would error on.
if ((uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels)
{
- Utils.Warning($"{DebugTag} audio frame #{frameIndex} metadata mismatch actualRate={sampleRate} actualChannels={channels} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}");
+ if (frameIndex == 1 || frameIndex % 100 == 0)
+ Utils.Warning($"{DebugTag} dropping audio frame #{frameIndex}: format {sampleRate}/{channels} does not match source {_expectedSampleRate}/{_expectedChannels} (sourceType={_sourceType})");
+ return;
}
var pendingBeforeSend = PendingFrameCount();
diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs
index 225c7a0c..532aa319 100644
--- a/Samples~/Meet/Assets/Runtime/MeetManager.cs
+++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs
@@ -453,8 +453,7 @@ private IEnumerator PublishLocalMicrophone()
{
if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break;
- Microphone.Start(null, true, 10, 44100);
-
+ // MicrophoneSource starts the device itself, so we only need the device name here.
var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}");
audioObject.transform.SetParent(_audioTrackParent);
@@ -488,7 +487,7 @@ private void UnpublishLocalMicrophone()
if (_audioObjects.TryGetValue(LocalAudioTrackName, out var obj))
{
- obj.GetComponent()?.Stop();
+ // MicrophoneSource reads the mic clip directly; no AudioSource is attached anymore.
Destroy(obj);
_audioObjects.Remove(LocalAudioTrackName);
}
@@ -567,7 +566,10 @@ private void CleanUpAllTracks()
foreach (var obj in _audioObjects.Values)
{
if (obj == null) continue;
- obj.GetComponent()?.Stop();
+ // Not every audio object has an AudioSource (the local mic object no longer does), and
+ // ?. on GetComponent bypasses Unity's missing-component null semantics in the editor.
+ if (obj.TryGetComponent(out var audioSource))
+ audioSource.Stop();
Destroy(obj);
}
_audioObjects.Clear();
diff --git a/Tests/EditMode/MicClipReaderTests.cs b/Tests/EditMode/MicClipReaderTests.cs
new file mode 100644
index 00000000..36a563d5
--- /dev/null
+++ b/Tests/EditMode/MicClipReaderTests.cs
@@ -0,0 +1,261 @@
+using System;
+using System.Collections.Generic;
+using NUnit.Framework;
+using LiveKit.Internal;
+
+namespace LiveKit.EditModeTests
+{
+ ///
+ /// Tests for the microphone clip reading logic, including reconstruction of the fragmented
+ /// buffers produced by macOS with Bluetooth HFP headsets (valid fragments of 320 samples at a
+ /// 1024-sample stride with zero padding, position counter inflated k=3.2x; structure taken
+ /// from a raw buffer dump of a Sony MDR-1000X).
+ ///
+ public class MicClipReaderTests
+ {
+ const double PreRoll = 0.3;
+
+ static List Drain(MicClipReader reader, int counter, double t)
+ {
+ var ranges = new List();
+ reader.Update(counter, t, ranges);
+ return ranges;
+ }
+
+ // Runs the pre-roll with the given advance per tick, returning (counter, time) at the end.
+ static (int counter, double t) RunPreRoll(MicClipReader reader, int clipFrames, int advancePerTick, double dt)
+ {
+ int counter = 0;
+ double t = 0;
+ reader.Update(counter, t, new List());
+ while (!reader.Ready)
+ {
+ t += dt;
+ counter = (counter + advancePerTick) % clipFrames;
+ reader.Update(counter, t, new List());
+ }
+ return (counter, t);
+ }
+
+ [Test]
+ public void HealthyDevice_UsesContiguousMode_AndEmitsAllSamples()
+ {
+ const int clipFrames = 96000; // 2s @ 48k
+ const int rate = 48000;
+ const int perTick = 480; // 10ms ticks at the data rate
+ const double dt = 0.01;
+
+ var reader = new MicClipReader(clipFrames, rate, PreRoll);
+ var (counter, t) = RunPreRoll(reader, clipFrames, perTick, dt);
+
+ Assert.IsFalse(reader.Fragmented);
+ Assert.AreEqual(1.0, reader.K, 0.02);
+
+ long emitted = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ t += dt;
+ counter = (counter + perTick) % clipFrames;
+ foreach (var r in Drain(reader, counter, t))
+ {
+ Assert.LessOrEqual(r.Start + r.Count, clipFrames, "range must not cross the ring wrap");
+ emitted += r.Count;
+ }
+ }
+ Assert.AreEqual(100L * perTick, emitted, "contiguous mode must emit every written sample");
+ }
+
+ [Test]
+ public void FragmentedDevice_DetectsStrideAndValidCount()
+ {
+ const int clipFrames = 32000; // 2s @ 16k
+ const int rate = 16000;
+ const int stride = 1024; // one counter jump per real 20ms packet
+ const double dt = 0.02;
+
+ var reader = new MicClipReader(clipFrames, rate, PreRoll);
+ RunPreRoll(reader, clipFrames, stride, dt);
+
+ Assert.IsTrue(reader.Fragmented);
+ Assert.AreEqual(3.2, reader.K, 0.05);
+ Assert.AreEqual(stride, reader.Stride);
+ Assert.AreEqual(320, reader.ValidPerStride);
+ }
+
+ [Test]
+ public void FragmentedDevice_ReconstructsContiguousStream()
+ {
+ const int clipFrames = 32000;
+ const int rate = 16000;
+ const int stride = 1024;
+ const int valid = 320;
+ const double dt = 0.02;
+
+ var reader = new MicClipReader(clipFrames, rate, PreRoll);
+
+ // Simulated clip: each tick the writer stores `valid` sequential marker values at the
+ // counter's previous position and zero-fills the rest of the stride, exactly like the
+ // dumped MDR-1000X buffer.
+ var clip = new float[clipFrames];
+ float marker = 1f;
+ int counter = 0;
+ double t = 0;
+ reader.Update(counter, t, new List());
+
+ void WriteFragment()
+ {
+ for (int i = 0; i < stride; i++)
+ clip[(counter + i) % clipFrames] = i < valid ? marker + i : 0f;
+ marker += valid;
+ counter = (counter + stride) % clipFrames;
+ }
+
+ while (!reader.Ready)
+ {
+ t += dt;
+ WriteFragment();
+ reader.Update(counter, t, new List());
+ }
+
+ // Capture for several buffer laps and verify the emitted stream is the unbroken
+ // marker sequence: lossless reconstruction with no gaps, repeats, or padding.
+ var collected = new List();
+ for (int tick = 0; tick < 200; tick++)
+ {
+ t += dt;
+ WriteFragment();
+ foreach (var r in Drain(reader, counter, t))
+ {
+ Assert.LessOrEqual(r.Start + r.Count, clipFrames, "range must not cross the ring wrap");
+ for (int i = 0; i < r.Count; i++)
+ collected.Add(clip[r.Start + i]);
+ }
+ }
+
+ Assert.AreEqual(200 * valid, collected.Count, "every valid fragment must be emitted exactly once");
+ for (int i = 1; i < collected.Count; i++)
+ Assert.AreEqual(collected[i - 1] + 1f, collected[i], $"stream must be contiguous at index {i}");
+ }
+
+ [Test]
+ public void FragmentedDevice_DropsStaleBacklogStrideAligned()
+ {
+ const int clipFrames = 32000;
+ const int rate = 16000;
+ const int stride = 1024;
+ const double dt = 0.02;
+ const double maxBacklogSec = 0.2;
+
+ var reader = new MicClipReader(clipFrames, rate, PreRoll, 1.05, maxBacklogSec);
+ var (counter, t) = RunPreRoll(reader, clipFrames, stride, dt);
+
+ // One giant advance (a main-thread stall): 25 strides at once.
+ const int stalledStrides = 25;
+ counter = (counter + stalledStrides * stride) % clipFrames;
+ t += stalledStrides * dt;
+ var ranges = Drain(reader, counter, t);
+
+ Assert.Greater(reader.TotalDropped, 0, "stall backlog must be dropped");
+ Assert.AreEqual(0, reader.TotalDropped % stride, "drop must preserve stride alignment");
+
+ // Emitted + dropped must account for the whole advance (in counter units).
+ long emittedStrides = 0;
+ foreach (var r in ranges) emittedStrides += r.Count;
+ emittedStrides /= reader.ValidPerStride;
+ Assert.AreEqual(stalledStrides, emittedStrides + reader.TotalDropped / stride);
+
+ // The bounded burst must not exceed the backlog limit.
+ Assert.LessOrEqual(emittedStrides * stride, (long)(reader.CounterRate * maxBacklogSec));
+ }
+
+ [Test]
+ public void SlightlyInflatedCounter_StaysContiguous()
+ {
+ // Regression: a healthy MacBook mic measured k=1.07 right after a device transition
+ // (startup-burst noise), and the old 1.05 threshold engaged fragmented mode, silently
+ // discarding ~6% of real audio. Borderline rates must stay contiguous.
+ const int clipFrames = 96000;
+ const int rate = 48000;
+ const int perTick = 514; // ~k=1.07 at 10ms ticks
+ const double dt = 0.01;
+
+ var reader = new MicClipReader(clipFrames, rate, PreRoll);
+ RunPreRoll(reader, clipFrames, perTick, dt);
+
+ Assert.IsFalse(reader.Fragmented, "k slightly above 1 must not trigger fragmented mode");
+ Assert.AreEqual(1.07, reader.K, 0.02);
+ }
+
+ [Test]
+ public void NoRangesAreEmittedDuringPreRoll()
+ {
+ const int clipFrames = 96000;
+ var reader = new MicClipReader(clipFrames, 48000, PreRoll);
+ var ranges = new List();
+ reader.Update(0, 0.0, ranges);
+ reader.Update(480, 0.01, ranges);
+ reader.Update(960, 0.02, ranges);
+ Assert.IsFalse(reader.Ready);
+ Assert.IsEmpty(ranges);
+ }
+ }
+
+ public class StreamingResamplerTests
+ {
+ static float[] Sine(int count, double freqHz, int rate)
+ {
+ var s = new float[count];
+ for (int i = 0; i < count; i++)
+ s[i] = (float)Math.Sin(2.0 * Math.PI * freqHz * i / rate);
+ return s;
+ }
+
+ static int ZeroCrossings(IReadOnlyList s)
+ {
+ int n = 0;
+ for (int i = 1; i < s.Count; i++)
+ if ((s[i - 1] < 0f) != (s[i] < 0f)) n++;
+ return n;
+ }
+
+ [Test]
+ public void Upsample16kTo48k_PreservesFrequencyAndLength()
+ {
+ const int inRate = 16000, outRate = 48000;
+ var input = Sine(16000, 200.0, inRate); // 1s of 200Hz
+ var resampler = new StreamingResampler(inRate, outRate);
+ var output = resampler.Process(input, input.Length);
+
+ Assert.AreEqual(outRate, output.Length, outRate / 100, "1s in should be ~1s out at the new rate");
+ // 200Hz over ~1s crosses zero ~400 times regardless of sample rate.
+ Assert.AreEqual(ZeroCrossings(input), ZeroCrossings(output), 4);
+ }
+
+ [Test]
+ public void ChunkedProcessing_MatchesWholeProcessing()
+ {
+ const int inRate = 16000, outRate = 48000;
+ var input = Sine(3200, 250.0, inRate);
+
+ var whole = new StreamingResampler(inRate, outRate).Process(input, input.Length);
+
+ // Process the same stream in 320-sample fragments (the MDR-1000X packet size).
+ var chunked = new List();
+ var resampler = new StreamingResampler(inRate, outRate);
+ for (int off = 0; off < input.Length; off += 320)
+ {
+ var chunk = new float[320];
+ Array.Copy(input, off, chunk, 0, 320);
+ chunked.AddRange(resampler.Process(chunk, 320));
+ }
+
+ // Accumulated floating-point rounding differs by an ulp between the two paths (the
+ // chunked position is renormalized per chunk), which can flip the final boundary
+ // sample — allow a 1-sample tail difference, but the overlap must match exactly.
+ Assert.AreEqual(whole.Length, chunked.Count, 1, "chunking must not change the output length (±1 tail sample)");
+ int overlap = Math.Min(whole.Length, chunked.Count);
+ for (int i = 0; i < overlap; i++)
+ Assert.AreEqual(whole[i], chunked[i], 1e-4f, $"chunked output diverges at {i}");
+ }
+ }
+}
diff --git a/Tests/EditMode/MicClipReaderTests.cs.meta b/Tests/EditMode/MicClipReaderTests.cs.meta
new file mode 100644
index 00000000..70af710c
--- /dev/null
+++ b/Tests/EditMode/MicClipReaderTests.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: b8df68a85510e4aa58359a4dd8b170c6
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {instanceID: 0}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs
index 907e9ccc..2337615b 100644
--- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs
+++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs
@@ -31,7 +31,7 @@ public SineWaveAudioSource(
int sampleRate = 48000,
double frequencyHz = 440.0,
float amplitude = 0.1f)
- : base(channels, RtcAudioSourceType.AudioSourceCustom)
+ : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels)
{
_channels = channels;
_sampleRate = sampleRate;