opus 和 NAudio 流不同步

opus and NAudio streaming out of sync

我正在游戏中添加 voip,因为 Unity 的麦克风 class 在 Web_GL 中不受支持并且已经很慢并且提供浮点数而不是字节。现在有人建议我使用编解码器,即 Opus,然后我发现它的包装器及其使用 NAudio 的演示,我对它相当满意,它使用了一些额外的循环,在删除后也给出了相同的结果,但无论如何它也给了 4000 字节和 48k 采样率,我将其减少到 8k,最大缓冲区大小为 350。这是该脚本的代码

private void Start()
{
    //StartEncoding();
    UnityEditor.EditorApplication.playmodeStateChanged = PlayModeStateChangedHandler;
}

private void PlayModeStateChangedHandler()
{
    if (UnityEditor.EditorApplication.isPaused)
    {
        StopEncoding();
    }
}

public void StartGame()
{
    StartEncoding();
}

private void StartEncoding()
{
    _client = FindObjectOfType<Client>();
    _client.AudioReceivers += UpdateAudioOutput;
    _startTime = DateTime.Now;
    _bytesSent = 0;
    _segmentFrames = 160;
    _encoder = OpusEncoder.Create(8000, 1, FragLabs.Audio.Codecs.Opus.Application.Voip);
    _encoder.MaxDataBytes = 350;
    _encoder.Bitrate = 4000;
    _decoder = OpusDecoder.Create(8000, 1);
    _decoder.MaxDataBytes = 175;
    _bytesPerSegment = _encoder.FrameByteCount(_segmentFrames);

    _waveIn = new WaveIn(WaveCallbackInfo.FunctionCallback());
    _waveIn.BufferMilliseconds = 50;
    _waveIn.DeviceNumber = 0;
    _waveIn.DataAvailable += _waveIn_DataAvailable;
    _waveIn.WaveFormat = new WaveFormat(8000, 16, 1);

    _playBuffer = new BufferedWaveProvider(new WaveFormat(8000, 16, 1));
    _playBuffer.DiscardOnBufferOverflow = true;

    _waveOut = new WaveOut(WaveCallbackInfo.FunctionCallback());
    _waveOut.DeviceNumber = 0;
    _waveOut.Init(_playBuffer);

    _waveOut.Play();
    _waveIn.StartRecording();

    if (_timer == null)
    {
        _timer = new Timer();
        _timer.Interval = 1000;
        _timer.Elapsed += _timer_Tick;
    }
    _timer.Start();
}

private void _timer_Tick(object sender, EventArgs e)
{
    var timeDiff = DateTime.Now - _startTime;
    var bytesPerSecond = _bytesSent / timeDiff.TotalSeconds;
}

byte[] _notEncodedBuffer = new byte[0];
private void _waveIn_DataAvailable(object sender, WaveInEventArgs e)
{
    byte[] soundBuffer = new byte[e.BytesRecorded + _notEncodedBuffer.Length];
    for (int i = 0; i < _notEncodedBuffer.Length; i++)
        soundBuffer[i] = _notEncodedBuffer[i];
    for (int i = 0; i < e.BytesRecorded; i++)
        soundBuffer[i + _notEncodedBuffer.Length] = e.Buffer[i];

    int byteCap = _bytesPerSegment;
    int segmentCount = (int)Math.Floor((decimal)soundBuffer.Length / byteCap);
    int segmentsEnd = segmentCount * byteCap;
    int notEncodedCount = soundBuffer.Length - segmentsEnd;
    _notEncodedBuffer = new byte[notEncodedCount];
    for (int i = 0; i < notEncodedCount; i++)
    {
        _notEncodedBuffer[i] = soundBuffer[segmentsEnd + i];
    }

    for (int i = 0; i < segmentCount; i++)
    {
        byte[] segment = new byte[byteCap];
        for (int j = 0; j < segment.Length; j++)
            segment[j] = soundBuffer[(i * byteCap) + j];
        int len;
        byte[] buff = _encoder.Encode(segment, segment.Length, out len);
        SendToServer(buff, len);
    }
}

public void UpdateAudioOutput(byte[] ba, int len)
{
    int outlen = len;
    byte[] buff = new byte[len];

    buff = _decoder.Decode(ba, outlen, out outlen);
    _playBuffer.AddSamples(buff, 0, outlen);
}

private void SendToServer(byte[] EncodedAudio, int Length)
{
    print("SENDING AUDIO");
    //print("audio length : " + EncodedAudio.Length);
    _client.Send(EncodedAudio, Length);
    //UpdateAudioOutput(EncodedAudio, Length);
}

private void StopEncoding()
{
    _timer.Stop();
    _waveIn.StopRecording();
    _waveIn.Dispose();
    _waveIn = null;
    _waveOut.Stop();
    _waveOut.Dispose();
    _waveOut = null;
    _playBuffer = null;
    _encoder.Dispose();
    _encoder = null;
    _decoder.Dispose();
    _decoder = null;
}

private void OnApplicationQuit()
{
    StopEncoding();
}

现在是 tcp 发送和接收,它们对于客户端和服务器来说几乎是一样的

public void Send(byte[] data, int customParamLen = 0)
{
    if (!socketReady)
    {
        return;
    }

    byte messageType = (1 << 3); // assume that 0000 1000 would be the Message type
    byte[] message = data;
    byte[] length = BitConverter.GetBytes(message.Length);
    byte[] customParam = BitConverter.GetBytes(customParamLen); //length also 4/sizeof(int)
    byte[] buffer = new byte[sizeof(int) + message.Length + 1 + customParam.Length];
    buffer[0] = messageType;

    //Enter length in the buffer
    for (int i = 0; i < sizeof(int); i++)
    {
        buffer[i + 1] = length[i];
    }

    //Enter data in the buffer
    for (int i = 0; i < message.Length; i++)
    {
        buffer[i + 1 + sizeof(int)] = message[i];
    }

    //Enter custom Param in the buffer
    for (int i = 0; i < sizeof(int); i++)
    {
        buffer[i + 1 + sizeof(int) + message.Length] = customParam[i];
    }

    heavyStream.Write(buffer, 0, buffer.Length);

    print("Writtin bytes");
}

if (heavyStream.DataAvailable)
        {
            print("Data Receiving YAY!");

            //Get message Type
            byte messageType = (byte)heavyStream.ReadByte();

            //Get length of the Data
            byte[] lengthBuffer = new byte[sizeof(int)];
            int recv = heavyStream.Read(lengthBuffer, 0, lengthBuffer.Length);

            if (recv == sizeof(int))
            {
                int messageLen = BitConverter.ToInt32(lengthBuffer, 0);

                //Get the Data
                byte[] messageBuffer = new byte[messageLen];
                recv = heavyStream.Read(messageBuffer, 0, messageBuffer.Length);

                if (recv == messageLen)
                {
                    // messageBuffer contains the whole message ...

                    //Get length paramater needed for opus to decode
                    byte[] customParamAudioLen = new byte[sizeof(int)];
                    recv = heavyStream.Read(customParamAudioLen, 0, customParamAudioLen.Length);

                    if (recv == sizeof(int))
                    {
                        AudioReceivers(messageBuffer, BitConverter.ToInt32(customParamAudioLen, 0) - 5);
                        print("Done! Everything went straight as planned");
                    }
                }
            }

现在的问题是音频断断续续的,中间有间隙,时间越久越不同步。

更新

仍未修复。

您似乎只是直接发送音频,接收端没有抖动缓冲区。这意味着如果您的延迟有任何变化,您将开始听到间隙。

你需要做的是在客户端缓冲音频 - 直到你有足够的时间,比如 400 毫秒,然后开始播放。这为您提供了额外的缓冲时间来应对抖动。

这是一种非常幼稚的方法,但为您提供了一些可玩的东西 - 您可能想要查看自适应抖动缓冲区,并且可能切换到 UDP 而不是 TCP 以获得更好的性能。使用 UDP,您将需要处理丢失的数据包、乱序等问题。

看看有抖动缓冲器的 Speex https://github.com/xiph/speex or Mumble which uses Speex for VOIP https://github.com/mumble-voip/mumble