opus 和 NAudio 流不同步
opus and NAudio streaming out of sync
我正在游戏中添加 voip,因为 Unity 的麦克风 class 在 Web_GL 中不受支持并且已经很慢并且提供浮点数而不是字节。现在有人建议我使用编解码器,即 Opus,然后我发现它的包装器及其使用 NAudio 的演示,我对它相当满意,它使用了一些额外的循环,在删除后也给出了相同的结果,但无论如何它也给了 4000 字节和 48k 采样率,我将其减少到 8k,最大缓冲区大小为 350。这是该脚本的代码
private void Start()
{
//StartEncoding();
UnityEditor.EditorApplication.playmodeStateChanged = PlayModeStateChangedHandler;
}
private void PlayModeStateChangedHandler()
{
if (UnityEditor.EditorApplication.isPaused)
{
StopEncoding();
}
}
public void StartGame()
{
StartEncoding();
}
private void StartEncoding()
{
_client = FindObjectOfType<Client>();
_client.AudioReceivers += UpdateAudioOutput;
_startTime = DateTime.Now;
_bytesSent = 0;
_segmentFrames = 160;
_encoder = OpusEncoder.Create(8000, 1, FragLabs.Audio.Codecs.Opus.Application.Voip);
_encoder.MaxDataBytes = 350;
_encoder.Bitrate = 4000;
_decoder = OpusDecoder.Create(8000, 1);
_decoder.MaxDataBytes = 175;
_bytesPerSegment = _encoder.FrameByteCount(_segmentFrames);
_waveIn = new WaveIn(WaveCallbackInfo.FunctionCallback());
_waveIn.BufferMilliseconds = 50;
_waveIn.DeviceNumber = 0;
_waveIn.DataAvailable += _waveIn_DataAvailable;
_waveIn.WaveFormat = new WaveFormat(8000, 16, 1);
_playBuffer = new BufferedWaveProvider(new WaveFormat(8000, 16, 1));
_playBuffer.DiscardOnBufferOverflow = true;
_waveOut = new WaveOut(WaveCallbackInfo.FunctionCallback());
_waveOut.DeviceNumber = 0;
_waveOut.Init(_playBuffer);
_waveOut.Play();
_waveIn.StartRecording();
if (_timer == null)
{
_timer = new Timer();
_timer.Interval = 1000;
_timer.Elapsed += _timer_Tick;
}
_timer.Start();
}
private void _timer_Tick(object sender, EventArgs e)
{
var timeDiff = DateTime.Now - _startTime;
var bytesPerSecond = _bytesSent / timeDiff.TotalSeconds;
}
byte[] _notEncodedBuffer = new byte[0];
private void _waveIn_DataAvailable(object sender, WaveInEventArgs e)
{
byte[] soundBuffer = new byte[e.BytesRecorded + _notEncodedBuffer.Length];
for (int i = 0; i < _notEncodedBuffer.Length; i++)
soundBuffer[i] = _notEncodedBuffer[i];
for (int i = 0; i < e.BytesRecorded; i++)
soundBuffer[i + _notEncodedBuffer.Length] = e.Buffer[i];
int byteCap = _bytesPerSegment;
int segmentCount = (int)Math.Floor((decimal)soundBuffer.Length / byteCap);
int segmentsEnd = segmentCount * byteCap;
int notEncodedCount = soundBuffer.Length - segmentsEnd;
_notEncodedBuffer = new byte[notEncodedCount];
for (int i = 0; i < notEncodedCount; i++)
{
_notEncodedBuffer[i] = soundBuffer[segmentsEnd + i];
}
for (int i = 0; i < segmentCount; i++)
{
byte[] segment = new byte[byteCap];
for (int j = 0; j < segment.Length; j++)
segment[j] = soundBuffer[(i * byteCap) + j];
int len;
byte[] buff = _encoder.Encode(segment, segment.Length, out len);
SendToServer(buff, len);
}
}
public void UpdateAudioOutput(byte[] ba, int len)
{
int outlen = len;
byte[] buff = new byte[len];
buff = _decoder.Decode(ba, outlen, out outlen);
_playBuffer.AddSamples(buff, 0, outlen);
}
private void SendToServer(byte[] EncodedAudio, int Length)
{
print("SENDING AUDIO");
//print("audio length : " + EncodedAudio.Length);
_client.Send(EncodedAudio, Length);
//UpdateAudioOutput(EncodedAudio, Length);
}
private void StopEncoding()
{
_timer.Stop();
_waveIn.StopRecording();
_waveIn.Dispose();
_waveIn = null;
_waveOut.Stop();
_waveOut.Dispose();
_waveOut = null;
_playBuffer = null;
_encoder.Dispose();
_encoder = null;
_decoder.Dispose();
_decoder = null;
}
private void OnApplicationQuit()
{
StopEncoding();
}
现在是 tcp 发送和接收,它们对于客户端和服务器来说几乎是一样的
public void Send(byte[] data, int customParamLen = 0)
{
if (!socketReady)
{
return;
}
byte messageType = (1 << 3); // assume that 0000 1000 would be the Message type
byte[] message = data;
byte[] length = BitConverter.GetBytes(message.Length);
byte[] customParam = BitConverter.GetBytes(customParamLen); //length also 4/sizeof(int)
byte[] buffer = new byte[sizeof(int) + message.Length + 1 + customParam.Length];
buffer[0] = messageType;
//Enter length in the buffer
for (int i = 0; i < sizeof(int); i++)
{
buffer[i + 1] = length[i];
}
//Enter data in the buffer
for (int i = 0; i < message.Length; i++)
{
buffer[i + 1 + sizeof(int)] = message[i];
}
//Enter custom Param in the buffer
for (int i = 0; i < sizeof(int); i++)
{
buffer[i + 1 + sizeof(int) + message.Length] = customParam[i];
}
heavyStream.Write(buffer, 0, buffer.Length);
print("Writtin bytes");
}
if (heavyStream.DataAvailable)
{
print("Data Receiving YAY!");
//Get message Type
byte messageType = (byte)heavyStream.ReadByte();
//Get length of the Data
byte[] lengthBuffer = new byte[sizeof(int)];
int recv = heavyStream.Read(lengthBuffer, 0, lengthBuffer.Length);
if (recv == sizeof(int))
{
int messageLen = BitConverter.ToInt32(lengthBuffer, 0);
//Get the Data
byte[] messageBuffer = new byte[messageLen];
recv = heavyStream.Read(messageBuffer, 0, messageBuffer.Length);
if (recv == messageLen)
{
// messageBuffer contains the whole message ...
//Get length paramater needed for opus to decode
byte[] customParamAudioLen = new byte[sizeof(int)];
recv = heavyStream.Read(customParamAudioLen, 0, customParamAudioLen.Length);
if (recv == sizeof(int))
{
AudioReceivers(messageBuffer, BitConverter.ToInt32(customParamAudioLen, 0) - 5);
print("Done! Everything went straight as planned");
}
}
}
现在的问题是音频断断续续的,中间有间隙,时间越久越不同步。
更新
仍未修复。
您似乎只是直接发送音频,接收端没有抖动缓冲区。这意味着如果您的延迟有任何变化,您将开始听到间隙。
你需要做的是在客户端缓冲音频 - 直到你有足够的时间,比如 400 毫秒,然后开始播放。这为您提供了额外的缓冲时间来应对抖动。
这是一种非常幼稚的方法,但为您提供了一些可玩的东西 - 您可能想要查看自适应抖动缓冲区,并且可能切换到 UDP 而不是 TCP 以获得更好的性能。使用 UDP,您将需要处理丢失的数据包、乱序等问题。
看看有抖动缓冲器的 Speex https://github.com/xiph/speex or Mumble which uses Speex for VOIP https://github.com/mumble-voip/mumble
我正在游戏中添加 voip,因为 Unity 的麦克风 class 在 Web_GL 中不受支持并且已经很慢并且提供浮点数而不是字节。现在有人建议我使用编解码器,即 Opus,然后我发现它的包装器及其使用 NAudio 的演示,我对它相当满意,它使用了一些额外的循环,在删除后也给出了相同的结果,但无论如何它也给了 4000 字节和 48k 采样率,我将其减少到 8k,最大缓冲区大小为 350。这是该脚本的代码
private void Start()
{
//StartEncoding();
UnityEditor.EditorApplication.playmodeStateChanged = PlayModeStateChangedHandler;
}
private void PlayModeStateChangedHandler()
{
if (UnityEditor.EditorApplication.isPaused)
{
StopEncoding();
}
}
public void StartGame()
{
StartEncoding();
}
private void StartEncoding()
{
_client = FindObjectOfType<Client>();
_client.AudioReceivers += UpdateAudioOutput;
_startTime = DateTime.Now;
_bytesSent = 0;
_segmentFrames = 160;
_encoder = OpusEncoder.Create(8000, 1, FragLabs.Audio.Codecs.Opus.Application.Voip);
_encoder.MaxDataBytes = 350;
_encoder.Bitrate = 4000;
_decoder = OpusDecoder.Create(8000, 1);
_decoder.MaxDataBytes = 175;
_bytesPerSegment = _encoder.FrameByteCount(_segmentFrames);
_waveIn = new WaveIn(WaveCallbackInfo.FunctionCallback());
_waveIn.BufferMilliseconds = 50;
_waveIn.DeviceNumber = 0;
_waveIn.DataAvailable += _waveIn_DataAvailable;
_waveIn.WaveFormat = new WaveFormat(8000, 16, 1);
_playBuffer = new BufferedWaveProvider(new WaveFormat(8000, 16, 1));
_playBuffer.DiscardOnBufferOverflow = true;
_waveOut = new WaveOut(WaveCallbackInfo.FunctionCallback());
_waveOut.DeviceNumber = 0;
_waveOut.Init(_playBuffer);
_waveOut.Play();
_waveIn.StartRecording();
if (_timer == null)
{
_timer = new Timer();
_timer.Interval = 1000;
_timer.Elapsed += _timer_Tick;
}
_timer.Start();
}
private void _timer_Tick(object sender, EventArgs e)
{
var timeDiff = DateTime.Now - _startTime;
var bytesPerSecond = _bytesSent / timeDiff.TotalSeconds;
}
byte[] _notEncodedBuffer = new byte[0];
private void _waveIn_DataAvailable(object sender, WaveInEventArgs e)
{
byte[] soundBuffer = new byte[e.BytesRecorded + _notEncodedBuffer.Length];
for (int i = 0; i < _notEncodedBuffer.Length; i++)
soundBuffer[i] = _notEncodedBuffer[i];
for (int i = 0; i < e.BytesRecorded; i++)
soundBuffer[i + _notEncodedBuffer.Length] = e.Buffer[i];
int byteCap = _bytesPerSegment;
int segmentCount = (int)Math.Floor((decimal)soundBuffer.Length / byteCap);
int segmentsEnd = segmentCount * byteCap;
int notEncodedCount = soundBuffer.Length - segmentsEnd;
_notEncodedBuffer = new byte[notEncodedCount];
for (int i = 0; i < notEncodedCount; i++)
{
_notEncodedBuffer[i] = soundBuffer[segmentsEnd + i];
}
for (int i = 0; i < segmentCount; i++)
{
byte[] segment = new byte[byteCap];
for (int j = 0; j < segment.Length; j++)
segment[j] = soundBuffer[(i * byteCap) + j];
int len;
byte[] buff = _encoder.Encode(segment, segment.Length, out len);
SendToServer(buff, len);
}
}
public void UpdateAudioOutput(byte[] ba, int len)
{
int outlen = len;
byte[] buff = new byte[len];
buff = _decoder.Decode(ba, outlen, out outlen);
_playBuffer.AddSamples(buff, 0, outlen);
}
private void SendToServer(byte[] EncodedAudio, int Length)
{
print("SENDING AUDIO");
//print("audio length : " + EncodedAudio.Length);
_client.Send(EncodedAudio, Length);
//UpdateAudioOutput(EncodedAudio, Length);
}
private void StopEncoding()
{
_timer.Stop();
_waveIn.StopRecording();
_waveIn.Dispose();
_waveIn = null;
_waveOut.Stop();
_waveOut.Dispose();
_waveOut = null;
_playBuffer = null;
_encoder.Dispose();
_encoder = null;
_decoder.Dispose();
_decoder = null;
}
private void OnApplicationQuit()
{
StopEncoding();
}
现在是 tcp 发送和接收,它们对于客户端和服务器来说几乎是一样的
public void Send(byte[] data, int customParamLen = 0)
{
if (!socketReady)
{
return;
}
byte messageType = (1 << 3); // assume that 0000 1000 would be the Message type
byte[] message = data;
byte[] length = BitConverter.GetBytes(message.Length);
byte[] customParam = BitConverter.GetBytes(customParamLen); //length also 4/sizeof(int)
byte[] buffer = new byte[sizeof(int) + message.Length + 1 + customParam.Length];
buffer[0] = messageType;
//Enter length in the buffer
for (int i = 0; i < sizeof(int); i++)
{
buffer[i + 1] = length[i];
}
//Enter data in the buffer
for (int i = 0; i < message.Length; i++)
{
buffer[i + 1 + sizeof(int)] = message[i];
}
//Enter custom Param in the buffer
for (int i = 0; i < sizeof(int); i++)
{
buffer[i + 1 + sizeof(int) + message.Length] = customParam[i];
}
heavyStream.Write(buffer, 0, buffer.Length);
print("Writtin bytes");
}
if (heavyStream.DataAvailable)
{
print("Data Receiving YAY!");
//Get message Type
byte messageType = (byte)heavyStream.ReadByte();
//Get length of the Data
byte[] lengthBuffer = new byte[sizeof(int)];
int recv = heavyStream.Read(lengthBuffer, 0, lengthBuffer.Length);
if (recv == sizeof(int))
{
int messageLen = BitConverter.ToInt32(lengthBuffer, 0);
//Get the Data
byte[] messageBuffer = new byte[messageLen];
recv = heavyStream.Read(messageBuffer, 0, messageBuffer.Length);
if (recv == messageLen)
{
// messageBuffer contains the whole message ...
//Get length paramater needed for opus to decode
byte[] customParamAudioLen = new byte[sizeof(int)];
recv = heavyStream.Read(customParamAudioLen, 0, customParamAudioLen.Length);
if (recv == sizeof(int))
{
AudioReceivers(messageBuffer, BitConverter.ToInt32(customParamAudioLen, 0) - 5);
print("Done! Everything went straight as planned");
}
}
}
现在的问题是音频断断续续的,中间有间隙,时间越久越不同步。
更新
仍未修复。
您似乎只是直接发送音频,接收端没有抖动缓冲区。这意味着如果您的延迟有任何变化,您将开始听到间隙。
你需要做的是在客户端缓冲音频 - 直到你有足够的时间,比如 400 毫秒,然后开始播放。这为您提供了额外的缓冲时间来应对抖动。
这是一种非常幼稚的方法,但为您提供了一些可玩的东西 - 您可能想要查看自适应抖动缓冲区,并且可能切换到 UDP 而不是 TCP 以获得更好的性能。使用 UDP,您将需要处理丢失的数据包、乱序等问题。
看看有抖动缓冲器的 Speex https://github.com/xiph/speex or Mumble which uses Speex for VOIP https://github.com/mumble-voip/mumble