diff --git a/Runtime/Scripts/BasicAudioSource.cs b/Runtime/Scripts/BasicAudioSource.cs index 3b63680b..8193090d 100644 --- a/Runtime/Scripts/BasicAudioSource.cs +++ b/Runtime/Scripts/BasicAudioSource.cs @@ -19,9 +19,11 @@ sealed public class BasicAudioSource : RtcAudioSource /// Creates a new basic audio source for the given in the scene. /// /// The to capture from. - /// The number of channels to capture. /// The type of audio source. - public BasicAudioSource(AudioSource source, int channels = 2, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(channels, sourceType) + /// + /// The sample rate and channel count are taken from Unity's audio configuration. + /// + public BasicAudioSource(AudioSource source, RtcAudioSourceType sourceType = RtcAudioSourceType.AudioSourceCustom) : base(sourceType) { _source = source; } diff --git a/Runtime/Scripts/Internal/MicClipReader.cs b/Runtime/Scripts/Internal/MicClipReader.cs new file mode 100644 index 00000000..99a49bbb --- /dev/null +++ b/Runtime/Scripts/Internal/MicClipReader.cs @@ -0,0 +1,177 @@ +using System; +using System.Collections.Generic; + +namespace LiveKit.Internal +{ + /// + /// Pure logic for reading a looping microphone clip ring buffer whose position counter may be + /// unreliable. Free of UnityEngine dependencies so it can be unit tested. + /// + /// + /// On most devices the position counter advances at the clip's data rate and capture is a + /// plain contiguous read. On macOS with a Bluetooth HFP headset, however, FMOD writes each + /// real packet of clip.frequency audio and then advances the counter k (~3.2) times too far, + /// zero-filling the skipped range: the buffer holds valid fragments of N samples at a stride + /// J (measured: 320 of every 1024), the fragments join continuously, and the counter rate is + /// k = J/N times the data rate. The reader measures the counter rate and its smallest + /// discrete jump during a pre-roll; when the rate is inflated it emits only the first J/k + /// samples of each stride, reconstructing the contiguous stream. + /// + internal sealed class MicClipReader + { + public struct ReadRange + { + public int Start; + public int Count; + } + + private readonly int _clipFrames; + private readonly int _dataRate; + private readonly double _preRollSeconds; + private readonly double _fragmentedKThreshold; + private readonly double _maxBacklogSeconds; + private readonly double _settleSeconds; + + private bool _hasFirstSample; + private int _prevCounter; + private double _firstSampleTime; + private double _measureStart = double.NaN; + private long _preRollAdvance; + private long _minJump = long.MaxValue; + + private long _maxBacklog; + private int _readPos; + private long _pending; + + /// False during the pre-roll measurement window; no ranges are emitted until ready. + public bool Ready { get; private set; } + + /// True when the counter rate is inflated and only part of each stride holds data. + public bool Fragmented { get; private set; } + + /// Counter samples per fragment cycle (0 when not fragmented). + public int Stride { get; private set; } + + /// Valid data samples at the start of each stride (0 when not fragmented). + public int ValidPerStride { get; private set; } + + /// Measured counter advance per second. + public double CounterRate { get; private set; } + + /// Counter inflation factor: CounterRate / dataRate (~1 on healthy devices). + public double K { get; private set; } + + /// Total counter samples discarded by backlog drops (e.g. after a stall). + public long TotalDropped { get; private set; } + + public MicClipReader(int clipFrames, int dataRate, + double preRollSeconds = 0.3, double fragmentedKThreshold = 1.5, double maxBacklogSeconds = 0.2, + double settleSeconds = 0.1) + { + if (clipFrames <= 0) throw new ArgumentOutOfRangeException(nameof(clipFrames)); + if (dataRate <= 0) throw new ArgumentOutOfRangeException(nameof(dataRate)); + _clipFrames = clipFrames; + _dataRate = dataRate; + _preRollSeconds = preRollSeconds; + _fragmentedKThreshold = fragmentedKThreshold; + _maxBacklogSeconds = maxBacklogSeconds; + _settleSeconds = settleSeconds; + } + + /// + /// Feeds the current counter position at a monotonic time and appends the ranges that + /// should be read from the clip (already split at the ring wrap) to . + /// + public void Update(int counterPosition, double elapsedSeconds, List ranges) + { + if (!_hasFirstSample) + { + _hasFirstSample = true; + _prevCounter = counterPosition; + _firstSampleTime = elapsedSeconds; + return; + } + + long d = ((counterPosition - _prevCounter) % _clipFrames + _clipFrames) % _clipFrames; + _prevCounter = counterPosition; + + if (!Ready) + { + // Discard the settle window entirely: right after a device starts, the counter can + // burst ahead while driver buffers flush, which would inflate the measured rate + // (observed: a healthy device measuring k=1.07 right after a device transition). + if (elapsedSeconds - _firstSampleTime < _settleSeconds) + return; + if (double.IsNaN(_measureStart)) + { + // Anchor the measurement window here; the delta spanning the settle boundary + // is discarded with the settle period. + _measureStart = elapsedSeconds; + return; + } + + _preRollAdvance += d; + if (d > 0 && d < _minJump) _minJump = d; + double window = elapsedSeconds - _measureStart; + if (window >= _preRollSeconds) + FinishPreRoll(window); + return; + } + + _pending += d; + + // After a long stall, drop the oldest backlog instead of pushing a burst that would + // overrun the consumer. (A stall longer than one counter lap aliases the unwrapped + // advance and silently loses whole laps; this bound covers everything observable.) + if (_pending > _maxBacklog) + { + long drop = _pending - _maxBacklog; + if (Fragmented) drop -= drop % Stride; // preserve stride alignment + if (drop > 0) + { + _readPos = (int)((_readPos + drop) % _clipFrames); + _pending -= drop; + TotalDropped += drop; + } + } + + if (Fragmented) + { + while (_pending >= Stride) + { + EmitSplit(_readPos, ValidPerStride, ranges); + _readPos = (_readPos + Stride) % _clipFrames; + _pending -= Stride; + } + } + else if (_pending > 0) + { + EmitSplit(_readPos, (int)_pending, ranges); + _readPos = (int)((_readPos + _pending) % _clipFrames); + _pending = 0; + } + } + + private void FinishPreRoll(double windowSeconds) + { + CounterRate = _preRollAdvance > 0 ? _preRollAdvance / windowSeconds : _dataRate; + K = CounterRate / _dataRate; + Fragmented = K > _fragmentedKThreshold && _minJump != long.MaxValue && _minJump > 1; + Stride = Fragmented ? (int)_minJump : 0; + ValidPerStride = Fragmented ? Math.Max(1, (int)Math.Round(Stride / K)) : 0; + _maxBacklog = (long)(CounterRate * _maxBacklogSeconds); + _readPos = _prevCounter; // counter values land on jump boundaries + _pending = 0; + Ready = true; + } + + private void EmitSplit(int start, int count, List ranges) + { + if (count <= 0) return; + int first = Math.Min(count, _clipFrames - start); + ranges.Add(new ReadRange { Start = start, Count = first }); + if (count > first) + ranges.Add(new ReadRange { Start = 0, Count = count - first }); + } + } +} diff --git a/Runtime/Scripts/Internal/MicClipReader.cs.meta b/Runtime/Scripts/Internal/MicClipReader.cs.meta new file mode 100644 index 00000000..88aa56bd --- /dev/null +++ b/Runtime/Scripts/Internal/MicClipReader.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: d0ae29390ef914aa6b62ae81c9b4f212 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Scripts/Internal/StreamingResampler.cs b/Runtime/Scripts/Internal/StreamingResampler.cs new file mode 100644 index 00000000..dd9be2d6 --- /dev/null +++ b/Runtime/Scripts/Internal/StreamingResampler.cs @@ -0,0 +1,54 @@ +using System; +using System.Collections.Generic; + +namespace LiveKit.Internal +{ + /// + /// Streaming linear resampler for mono audio. Interpolation state carries across chunks, so a + /// stream processed in arbitrary slices produces the same output as processing it whole. + /// Free of UnityEngine dependencies so it can be unit tested. + /// + internal sealed class StreamingResampler + { + private readonly double _step; // input samples advanced per output sample + private double _pos; // fractional read position; >= -1, where -1 maps to _prev + private float _prev; // last sample of the previous chunk + + public StreamingResampler(int inputRate, int outputRate) + { + if (inputRate <= 0) throw new ArgumentOutOfRangeException(nameof(inputRate)); + if (outputRate <= 0) throw new ArgumentOutOfRangeException(nameof(outputRate)); + _step = (double)inputRate / outputRate; + } + + public void Reset() + { + _pos = 0.0; + _prev = 0f; + } + + /// + /// Resamples the first samples of and + /// returns the produced output samples (possibly empty for very small chunks). + /// + public float[] Process(float[] input, int count) + { + if (count <= 0) return Array.Empty(); + + var output = new List((int)(count / _step) + 2); + double pos = _pos; + while (pos < count - 1) + { + int i0 = (int)Math.Floor(pos); + float a = i0 < 0 ? _prev : input[i0]; + float b = input[i0 + 1]; + float frac = (float)(pos - i0); + output.Add(a * (1f - frac) + b * frac); + pos += _step; + } + _prev = input[count - 1]; + _pos = pos - count; + return output.ToArray(); + } + } +} diff --git a/Runtime/Scripts/Internal/StreamingResampler.cs.meta b/Runtime/Scripts/Internal/StreamingResampler.cs.meta new file mode 100644 index 00000000..26d7c37c --- /dev/null +++ b/Runtime/Scripts/Internal/StreamingResampler.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 967338b84cfb74bdebca9132f3b9abd0 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Scripts/MicrophoneSource.cs b/Runtime/Scripts/MicrophoneSource.cs index 904b8da7..b424e2a5 100644 --- a/Runtime/Scripts/MicrophoneSource.cs +++ b/Runtime/Scripts/MicrophoneSource.cs @@ -1,5 +1,6 @@ using System; using System.Collections; +using System.Collections.Generic; using UnityEngine; using LiveKit.Internal; @@ -13,25 +14,57 @@ namespace LiveKit /// sealed public class MicrophoneSource : RtcAudioSource { - private readonly GameObject _sourceObject; + // --- Capture design --- + // The microphone clip's ring buffer is read directly (no AudioSource playback, no + // OnAudioFilterRead), so capture is decoupled from the output device's clock. + // + // Microphone.GetPosition cannot be trusted as a sample position on every platform; see + // MicClipReader for the fragmented-buffer model (macOS + Bluetooth HFP) and how the + // contiguous stream is reconstructed from it. + // + // The clip's data rate is clip.frequency (verified: fragments play at correct pitch), so + // captured samples are resampled from clip.frequency to the fixed native-source rate. + private const uint TargetSampleRate = 48000; + private const float PreRollSeconds = 0.3f; + private const float SettleSeconds = 0.1f; // discard the counter's startup burst before measuring + // Engaging fragmented mode discards (stride - valid) samples per stride, so a false + // positive guarantees audio loss while a false negative only risks mild artifacts. The + // observed pathological device measures k=3.2; healthy devices measure ~1.0 with up to a + // few percent of startup noise. Keep a wide margin between the two. + private const double FragmentedKThreshold = 1.5; + private const float MaxBacklogSeconds = 0.2f; // drop backlog beyond this after a stall + private readonly string _deviceName; public override event Action AudioRead; private bool _disposed = false; private bool _started = false; + private volatile bool _capturing = false; + + private StreamingResampler _resampler; /// /// Creates a new microphone source for the given device. /// /// The name of the device to capture from. Use to /// get the list of available devices. - /// The GameObject to attach the AudioSource to. The object must be kept in the scene - /// for the duration of the source's lifetime. - public MicrophoneSource(string deviceName, GameObject sourceObject) : base(2, RtcAudioSourceType.AudioSourceMicrophone) + /// Unused; retained for compatibility. The microphone clip is read + /// directly, so no scene GameObject/AudioSource is required. + public MicrophoneSource(string deviceName, GameObject sourceObject) + : base(RtcAudioSourceType.AudioSourceMicrophone, TargetSampleRate, 1) { _deviceName = deviceName; - _sourceObject = sourceObject; + } + + // The rate requested from Microphone.Start (a hint the platform may not honor), clamped to + // the device's reported range. The authoritative data rate is clip.frequency afterwards. + private static int ResolveRequestedSampleRate(string deviceName) + { + Microphone.GetDeviceCaps(deviceName, out int minFreq, out int maxFreq); + if (minFreq == 0 && maxFreq == 0) + return (int)TargetSampleRate; + return Mathf.Clamp((int)TargetSampleRate, minFreq, maxFreq); } /// @@ -49,7 +82,6 @@ public override void Start() base.Start(); if (_started) return; - if (!Application.HasUserAuthorization(mode: UserAuthorization.Microphone)) throw new InvalidOperationException("Microphone access not authorized"); @@ -61,13 +93,6 @@ public override void Start() private IEnumerator StartMicrophone() { - // Validate that the GameObject is still valid before starting - if (_sourceObject == null) - { - Utils.Error("MicrophoneSource: GameObject is null, cannot start microphone"); - yield break; - } - // Verify microphone is still authorized (could change during background) if (!Application.HasUserAuthorization(UserAuthorization.Microphone)) { @@ -76,13 +101,14 @@ private IEnumerator StartMicrophone() } AudioClip clip = null; + int requestedRate = ResolveRequestedSampleRate(_deviceName); try { clip = Microphone.Start( _deviceName, loop: true, - lengthSec: 1, - frequency: (int)DefaultMicrophoneSampleRate + lengthSec: 2, + frequency: requestedRate ); } catch (Exception e) @@ -97,29 +123,6 @@ private IEnumerator StartMicrophone() yield break; } - // Ensure no duplicate components exist before adding new ones. - // This is important during app resume on iOS where components might not be - // fully destroyed yet due to Unity's deferred Destroy(). - var existingSource = _sourceObject.GetComponent(); - if (existingSource != null) - UnityEngine.Object.DestroyImmediate(existingSource); - - var existingProbe = _sourceObject.GetComponent(); - if (existingProbe != null) - { - existingProbe.AudioRead -= OnAudioRead; - UnityEngine.Object.DestroyImmediate(existingProbe); - } - - var source = _sourceObject.AddComponent(); - source.clip = clip; - source.loop = true; - - var probe = _sourceObject.AddComponent(); - // Clear the audio data after it is read as to not play it through the speaker locally. - probe.ClearAfterInvocation(); - probe.AudioRead += OnAudioRead; - // Wait for microphone to actually start producing data with a timeout const float timeout = 2f; float elapsed = 0f; @@ -135,8 +138,86 @@ private IEnumerator StartMicrophone() yield break; } - source.Play(); - Utils.Debug($"MicrophoneSource device='{_deviceName}' started successfully"); + Utils.Info($"MicrophoneSource device='{_deviceName}' clip={clip.frequency}Hz/{clip.channels}ch samples={clip.samples} requested={requestedRate}Hz target={TargetSampleRate}Hz"); + + _capturing = true; + MonoBehaviourContext.RunCoroutine(CaptureLoop(clip)); + } + + // Reads new samples from the clip's ring buffer each frame and pushes them to the native + // source via AudioRead. MicClipReader decides what to read (including reconstructing + // fragmented buffers); this loop is the thin Unity shell around it. Runs on the main + // thread; the native source's queue absorbs the per-frame pacing jitter. + private IEnumerator CaptureLoop(AudioClip clip) + { + int clipFrames = clip.samples; + int channels = clip.channels; + int dataRate = clip.frequency > 0 ? clip.frequency : (int)DefaultMicrophoneSampleRate; + + var reader = new MicClipReader(clipFrames, dataRate, PreRollSeconds, FragmentedKThreshold, MaxBacklogSeconds, SettleSeconds); + _resampler = new StreamingResampler(dataRate, (int)TargetSampleRate); + var ranges = new List(); + var clock = System.Diagnostics.Stopwatch.StartNew(); + bool announced = false; + long reportedDrops = 0; + + while (_capturing && !_disposed) + { + yield return null; + + ranges.Clear(); + reader.Update(Microphone.GetPosition(_deviceName), clock.Elapsed.TotalSeconds, ranges); + + if (!announced && reader.Ready) + { + announced = true; + if (reader.Fragmented) + Utils.Info($"MicrophoneSource: fragmented clip detected (k={reader.K:F2}); reading {reader.ValidPerStride} of every {reader.Stride} samples at {dataRate}Hz"); + else + Utils.Info($"MicrophoneSource: contiguous capture (k={reader.K:F2}) at {dataRate}Hz"); + } + + if (reader.TotalDropped > reportedDrops) + { + Utils.Warning($"MicrophoneSource: dropped {reader.TotalDropped - reportedDrops} buffered samples after a stall"); + reportedDrops = reader.TotalDropped; + } + + for (int i = 0; i < ranges.Count; i++) + ReadAndPush(clip, channels, ranges[i].Start, ranges[i].Count); + } + } + + // Reads a contiguous range, downmixes to mono, resamples clip.frequency -> + // TargetSampleRate (the resampler carries state across calls, so fragment junctions stay + // continuous), and fires AudioRead. + private void ReadAndPush(AudioClip clip, int channels, int start, int count) + { + if (count <= 0) return; + + var interleaved = new float[count * channels]; + clip.GetData(interleaved, start); + + float[] mono; + if (channels == 1) + { + mono = interleaved; + } + else + { + mono = new float[count]; + for (int f = 0; f < count; f++) + { + float sum = 0f; + for (int ch = 0; ch < channels; ch++) + sum += interleaved[f * channels + ch]; + mono[f] = sum / channels; + } + } + + var output = _resampler.Process(mono, count); + if (output.Length > 0) + AudioRead?.Invoke(output, 1, (int)TargetSampleRate); } /// @@ -152,33 +233,15 @@ public override void Stop() private IEnumerator StopMicrophone() { + _capturing = false; + if (Microphone.IsRecording(_deviceName)) Microphone.End(_deviceName); - // Check if GameObject is still valid before trying to access components - if (_sourceObject != null) - { - var probe = _sourceObject.GetComponent(); - if (probe != null) - { - probe.AudioRead -= OnAudioRead; - UnityEngine.Object.Destroy(probe); - } - - var source = _sourceObject.GetComponent(); - if (source != null) - UnityEngine.Object.Destroy(source); - } - Utils.Debug($"MicrophoneSource device='{_deviceName}' stopped"); yield return null; } - private void OnAudioRead(float[] data, int channels, int sampleRate) - { - AudioRead?.Invoke(data, channels, sampleRate); - } - private void OnApplicationPause(bool pause) { if (!_started) @@ -246,4 +309,4 @@ protected override void Dispose(bool disposing) Dispose(false); } } -} \ No newline at end of file +} diff --git a/Runtime/Scripts/RtcAudioSource.cs b/Runtime/Scripts/RtcAudioSource.cs index a9af8a0a..43f5c102 100644 --- a/Runtime/Scripts/RtcAudioSource.cs +++ b/Runtime/Scripts/RtcAudioSource.cs @@ -83,20 +83,33 @@ private sealed class PendingAudioFrame private volatile bool _disposed = false; private int _audioReadCount = 0; - protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = RtcAudioSourceType.AudioSourceCustom) + // Device-capture sources (microphone, AudioSource taps) don't know their format ahead of + // time — it is whatever Unity's audio graph delivers. They use this constructor, which + // configures the native source from Unity's current output configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType) + : this(audioSourceType, 0, 0) { } + + // Sources that generate a fixed, known format (e.g. test signal generators) declare it + // directly. Passing 0 for either value falls back to the device configuration. + protected RtcAudioSource(RtcAudioSourceType audioSourceType, uint sampleRate, uint channels) { _sourceType = audioSourceType; - _expectedChannels = (uint)channels; + + if (sampleRate > 0 && channels > 0) + { + _expectedSampleRate = sampleRate; + _expectedChannels = channels; + } + else + { + (_expectedSampleRate, _expectedChannels) = ResolveDeviceFormat(); + } using var request = FFIBridge.Instance.NewRequest(); var newAudioSource = request.request; newAudioSource.Type = AudioSourceType.AudioSourceNative; - newAudioSource.NumChannels = (uint)channels; - newAudioSource.SampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone ? - DefaultMicrophoneSampleRate : DefaultSampleRate; - _expectedSampleRate = newAudioSource.SampleRate; - - Utils.Debug($"NewAudioSource: {newAudioSource.NumChannels} {newAudioSource.SampleRate}"); + newAudioSource.NumChannels = _expectedChannels; + newAudioSource.SampleRate = _expectedSampleRate; newAudioSource.Options = request.TempResource(); newAudioSource.Options.EchoCancellation = true; @@ -109,6 +122,49 @@ protected RtcAudioSource(int channels = 2, RtcAudioSourceType audioSourceType = Utils.Debug($"{DebugTag} created handle={Handle.DangerousGetHandle()} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); } + // Reads Unity's actual output audio configuration. The capture path delivers buffers at the + // DSP output rate/channel count (see AudioProbe), so this is the format the native source + // must match. Falls back to the platform defaults when Unity cannot report a configuration + // (e.g. batch mode without an audio device). + private (uint sampleRate, uint channels) ResolveDeviceFormat() + { + uint sampleRate = _sourceType == RtcAudioSourceType.AudioSourceMicrophone + ? DefaultMicrophoneSampleRate + : DefaultSampleRate; + uint channels = DefaultChannels; + + try + { + var config = UnityEngine.AudioSettings.GetConfiguration(); + if (config.sampleRate > 0) + sampleRate = (uint)config.sampleRate; + var configuredChannels = SpeakerModeChannels(config.speakerMode); + if (configuredChannels > 0) + channels = configuredChannels; + } + catch (Exception e) + { + Utils.Warning($"{DebugTag} could not read Unity audio configuration, using defaults: {e.Message}"); + } + + return (sampleRate, channels); + } + + private static uint SpeakerModeChannels(UnityEngine.AudioSpeakerMode mode) + { + switch (mode) + { + case UnityEngine.AudioSpeakerMode.Mono: return 1; + case UnityEngine.AudioSpeakerMode.Stereo: return 2; + case UnityEngine.AudioSpeakerMode.Quad: return 4; + case UnityEngine.AudioSpeakerMode.Surround: return 5; + case UnityEngine.AudioSpeakerMode.Mode5point1: return 6; + case UnityEngine.AudioSpeakerMode.Mode7point1: return 8; + case UnityEngine.AudioSpeakerMode.Prologic: return 2; + default: return 0; + } + } + /// /// Begin capturing audio samples from the underlying source. /// @@ -153,9 +209,16 @@ private void OnAudioRead(float[] data, int channels, int sampleRate) return; } + // The native source rejects frames whose rate/channels differ from how it was + // configured (it does not resample). This should not happen now that sources declare + // or resolve their real format, but if Unity reports an inconsistent format — or the + // output configuration changes at runtime — we drop the frame instead of sending a + // mismatch the native side would error on. if ((uint)sampleRate != _expectedSampleRate || (uint)channels != _expectedChannels) { - Utils.Warning($"{DebugTag} audio frame #{frameIndex} metadata mismatch actualRate={sampleRate} actualChannels={channels} expectedRate={_expectedSampleRate} expectedChannels={_expectedChannels} sourceType={_sourceType}"); + if (frameIndex == 1 || frameIndex % 100 == 0) + Utils.Warning($"{DebugTag} dropping audio frame #{frameIndex}: format {sampleRate}/{channels} does not match source {_expectedSampleRate}/{_expectedChannels} (sourceType={_sourceType})"); + return; } var pendingBeforeSend = PendingFrameCount(); diff --git a/Samples~/Meet/Assets/Runtime/MeetManager.cs b/Samples~/Meet/Assets/Runtime/MeetManager.cs index 225c7a0c..532aa319 100644 --- a/Samples~/Meet/Assets/Runtime/MeetManager.cs +++ b/Samples~/Meet/Assets/Runtime/MeetManager.cs @@ -453,8 +453,7 @@ private IEnumerator PublishLocalMicrophone() { if (_audioObjects.ContainsKey(LocalAudioTrackName)) yield break; - Microphone.Start(null, true, 10, 44100); - + // MicrophoneSource starts the device itself, so we only need the device name here. var audioObject = new GameObject($"My Microphone: {Microphone.devices[0]}"); audioObject.transform.SetParent(_audioTrackParent); @@ -488,7 +487,7 @@ private void UnpublishLocalMicrophone() if (_audioObjects.TryGetValue(LocalAudioTrackName, out var obj)) { - obj.GetComponent()?.Stop(); + // MicrophoneSource reads the mic clip directly; no AudioSource is attached anymore. Destroy(obj); _audioObjects.Remove(LocalAudioTrackName); } @@ -567,7 +566,10 @@ private void CleanUpAllTracks() foreach (var obj in _audioObjects.Values) { if (obj == null) continue; - obj.GetComponent()?.Stop(); + // Not every audio object has an AudioSource (the local mic object no longer does), and + // ?. on GetComponent bypasses Unity's missing-component null semantics in the editor. + if (obj.TryGetComponent(out var audioSource)) + audioSource.Stop(); Destroy(obj); } _audioObjects.Clear(); diff --git a/Tests/EditMode/MicClipReaderTests.cs b/Tests/EditMode/MicClipReaderTests.cs new file mode 100644 index 00000000..36a563d5 --- /dev/null +++ b/Tests/EditMode/MicClipReaderTests.cs @@ -0,0 +1,261 @@ +using System; +using System.Collections.Generic; +using NUnit.Framework; +using LiveKit.Internal; + +namespace LiveKit.EditModeTests +{ + /// + /// Tests for the microphone clip reading logic, including reconstruction of the fragmented + /// buffers produced by macOS with Bluetooth HFP headsets (valid fragments of 320 samples at a + /// 1024-sample stride with zero padding, position counter inflated k=3.2x; structure taken + /// from a raw buffer dump of a Sony MDR-1000X). + /// + public class MicClipReaderTests + { + const double PreRoll = 0.3; + + static List Drain(MicClipReader reader, int counter, double t) + { + var ranges = new List(); + reader.Update(counter, t, ranges); + return ranges; + } + + // Runs the pre-roll with the given advance per tick, returning (counter, time) at the end. + static (int counter, double t) RunPreRoll(MicClipReader reader, int clipFrames, int advancePerTick, double dt) + { + int counter = 0; + double t = 0; + reader.Update(counter, t, new List()); + while (!reader.Ready) + { + t += dt; + counter = (counter + advancePerTick) % clipFrames; + reader.Update(counter, t, new List()); + } + return (counter, t); + } + + [Test] + public void HealthyDevice_UsesContiguousMode_AndEmitsAllSamples() + { + const int clipFrames = 96000; // 2s @ 48k + const int rate = 48000; + const int perTick = 480; // 10ms ticks at the data rate + const double dt = 0.01; + + var reader = new MicClipReader(clipFrames, rate, PreRoll); + var (counter, t) = RunPreRoll(reader, clipFrames, perTick, dt); + + Assert.IsFalse(reader.Fragmented); + Assert.AreEqual(1.0, reader.K, 0.02); + + long emitted = 0; + for (int i = 0; i < 100; i++) + { + t += dt; + counter = (counter + perTick) % clipFrames; + foreach (var r in Drain(reader, counter, t)) + { + Assert.LessOrEqual(r.Start + r.Count, clipFrames, "range must not cross the ring wrap"); + emitted += r.Count; + } + } + Assert.AreEqual(100L * perTick, emitted, "contiguous mode must emit every written sample"); + } + + [Test] + public void FragmentedDevice_DetectsStrideAndValidCount() + { + const int clipFrames = 32000; // 2s @ 16k + const int rate = 16000; + const int stride = 1024; // one counter jump per real 20ms packet + const double dt = 0.02; + + var reader = new MicClipReader(clipFrames, rate, PreRoll); + RunPreRoll(reader, clipFrames, stride, dt); + + Assert.IsTrue(reader.Fragmented); + Assert.AreEqual(3.2, reader.K, 0.05); + Assert.AreEqual(stride, reader.Stride); + Assert.AreEqual(320, reader.ValidPerStride); + } + + [Test] + public void FragmentedDevice_ReconstructsContiguousStream() + { + const int clipFrames = 32000; + const int rate = 16000; + const int stride = 1024; + const int valid = 320; + const double dt = 0.02; + + var reader = new MicClipReader(clipFrames, rate, PreRoll); + + // Simulated clip: each tick the writer stores `valid` sequential marker values at the + // counter's previous position and zero-fills the rest of the stride, exactly like the + // dumped MDR-1000X buffer. + var clip = new float[clipFrames]; + float marker = 1f; + int counter = 0; + double t = 0; + reader.Update(counter, t, new List()); + + void WriteFragment() + { + for (int i = 0; i < stride; i++) + clip[(counter + i) % clipFrames] = i < valid ? marker + i : 0f; + marker += valid; + counter = (counter + stride) % clipFrames; + } + + while (!reader.Ready) + { + t += dt; + WriteFragment(); + reader.Update(counter, t, new List()); + } + + // Capture for several buffer laps and verify the emitted stream is the unbroken + // marker sequence: lossless reconstruction with no gaps, repeats, or padding. + var collected = new List(); + for (int tick = 0; tick < 200; tick++) + { + t += dt; + WriteFragment(); + foreach (var r in Drain(reader, counter, t)) + { + Assert.LessOrEqual(r.Start + r.Count, clipFrames, "range must not cross the ring wrap"); + for (int i = 0; i < r.Count; i++) + collected.Add(clip[r.Start + i]); + } + } + + Assert.AreEqual(200 * valid, collected.Count, "every valid fragment must be emitted exactly once"); + for (int i = 1; i < collected.Count; i++) + Assert.AreEqual(collected[i - 1] + 1f, collected[i], $"stream must be contiguous at index {i}"); + } + + [Test] + public void FragmentedDevice_DropsStaleBacklogStrideAligned() + { + const int clipFrames = 32000; + const int rate = 16000; + const int stride = 1024; + const double dt = 0.02; + const double maxBacklogSec = 0.2; + + var reader = new MicClipReader(clipFrames, rate, PreRoll, 1.05, maxBacklogSec); + var (counter, t) = RunPreRoll(reader, clipFrames, stride, dt); + + // One giant advance (a main-thread stall): 25 strides at once. + const int stalledStrides = 25; + counter = (counter + stalledStrides * stride) % clipFrames; + t += stalledStrides * dt; + var ranges = Drain(reader, counter, t); + + Assert.Greater(reader.TotalDropped, 0, "stall backlog must be dropped"); + Assert.AreEqual(0, reader.TotalDropped % stride, "drop must preserve stride alignment"); + + // Emitted + dropped must account for the whole advance (in counter units). + long emittedStrides = 0; + foreach (var r in ranges) emittedStrides += r.Count; + emittedStrides /= reader.ValidPerStride; + Assert.AreEqual(stalledStrides, emittedStrides + reader.TotalDropped / stride); + + // The bounded burst must not exceed the backlog limit. + Assert.LessOrEqual(emittedStrides * stride, (long)(reader.CounterRate * maxBacklogSec)); + } + + [Test] + public void SlightlyInflatedCounter_StaysContiguous() + { + // Regression: a healthy MacBook mic measured k=1.07 right after a device transition + // (startup-burst noise), and the old 1.05 threshold engaged fragmented mode, silently + // discarding ~6% of real audio. Borderline rates must stay contiguous. + const int clipFrames = 96000; + const int rate = 48000; + const int perTick = 514; // ~k=1.07 at 10ms ticks + const double dt = 0.01; + + var reader = new MicClipReader(clipFrames, rate, PreRoll); + RunPreRoll(reader, clipFrames, perTick, dt); + + Assert.IsFalse(reader.Fragmented, "k slightly above 1 must not trigger fragmented mode"); + Assert.AreEqual(1.07, reader.K, 0.02); + } + + [Test] + public void NoRangesAreEmittedDuringPreRoll() + { + const int clipFrames = 96000; + var reader = new MicClipReader(clipFrames, 48000, PreRoll); + var ranges = new List(); + reader.Update(0, 0.0, ranges); + reader.Update(480, 0.01, ranges); + reader.Update(960, 0.02, ranges); + Assert.IsFalse(reader.Ready); + Assert.IsEmpty(ranges); + } + } + + public class StreamingResamplerTests + { + static float[] Sine(int count, double freqHz, int rate) + { + var s = new float[count]; + for (int i = 0; i < count; i++) + s[i] = (float)Math.Sin(2.0 * Math.PI * freqHz * i / rate); + return s; + } + + static int ZeroCrossings(IReadOnlyList s) + { + int n = 0; + for (int i = 1; i < s.Count; i++) + if ((s[i - 1] < 0f) != (s[i] < 0f)) n++; + return n; + } + + [Test] + public void Upsample16kTo48k_PreservesFrequencyAndLength() + { + const int inRate = 16000, outRate = 48000; + var input = Sine(16000, 200.0, inRate); // 1s of 200Hz + var resampler = new StreamingResampler(inRate, outRate); + var output = resampler.Process(input, input.Length); + + Assert.AreEqual(outRate, output.Length, outRate / 100, "1s in should be ~1s out at the new rate"); + // 200Hz over ~1s crosses zero ~400 times regardless of sample rate. + Assert.AreEqual(ZeroCrossings(input), ZeroCrossings(output), 4); + } + + [Test] + public void ChunkedProcessing_MatchesWholeProcessing() + { + const int inRate = 16000, outRate = 48000; + var input = Sine(3200, 250.0, inRate); + + var whole = new StreamingResampler(inRate, outRate).Process(input, input.Length); + + // Process the same stream in 320-sample fragments (the MDR-1000X packet size). + var chunked = new List(); + var resampler = new StreamingResampler(inRate, outRate); + for (int off = 0; off < input.Length; off += 320) + { + var chunk = new float[320]; + Array.Copy(input, off, chunk, 0, 320); + chunked.AddRange(resampler.Process(chunk, 320)); + } + + // Accumulated floating-point rounding differs by an ulp between the two paths (the + // chunked position is renormalized per chunk), which can flip the final boundary + // sample — allow a 1-sample tail difference, but the overlap must match exactly. + Assert.AreEqual(whole.Length, chunked.Count, 1, "chunking must not change the output length (±1 tail sample)"); + int overlap = Math.Min(whole.Length, chunked.Count); + for (int i = 0; i < overlap; i++) + Assert.AreEqual(whole[i], chunked[i], 1e-4f, $"chunked output diverges at {i}"); + } + } +} diff --git a/Tests/EditMode/MicClipReaderTests.cs.meta b/Tests/EditMode/MicClipReaderTests.cs.meta new file mode 100644 index 00000000..70af710c --- /dev/null +++ b/Tests/EditMode/MicClipReaderTests.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: b8df68a85510e4aa58359a4dd8b170c6 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Tests/PlayMode/Utils/SineWaveAudioSource.cs b/Tests/PlayMode/Utils/SineWaveAudioSource.cs index 907e9ccc..2337615b 100644 --- a/Tests/PlayMode/Utils/SineWaveAudioSource.cs +++ b/Tests/PlayMode/Utils/SineWaveAudioSource.cs @@ -31,7 +31,7 @@ public SineWaveAudioSource( int sampleRate = 48000, double frequencyHz = 440.0, float amplitude = 0.1f) - : base(channels, RtcAudioSourceType.AudioSourceCustom) + : base(RtcAudioSourceType.AudioSourceCustom, (uint)sampleRate, (uint)channels) { _channels = channels; _sampleRate = sampleRate;