启用扬声器时麦克风音频失真 (Xamarin.iOS)
Distorted microphone audio when the loudspeaker is enabled (Xamarin.iOS)
我正在维护一键通 VoIP 应用程序。
当 PTT 呼叫 运行 时,应用会创建一个音频会话
m_AudioSession = AVAudioSession.SharedInstance();
NSError error;
if (!m_AudioSession.SetCategory(AVAudioSession.CategoryPlayAndRecord, AVAudioSessionCategoryOptions.DefaultToSpeaker | AVAudioSessionCategoryOptions.AllowBluetooth, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the category");
}
if (!m_AudioSession.SetMode(AVAudioSession.ModeVoiceChat, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the mode");
}
if (!m_AudioSession.OverrideOutputAudioPort(AVAudioSessionPortOverride.Speaker, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error redirecting the audio to the loudspeaker");
}
if (!m_AudioSession.SetPreferredIOBufferDuration(0.06, out error)) // 60 milli seconds
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the preferred buffer duration");
}
if (!m_AudioSession.SetPreferredSampleRate(8000, out error)) // kHz
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the preferred sample rate");
}
if (!m_AudioSession.SetActive(true, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error activating the audio session");
}
使用 OutputAudioQueue 播放接收到的音频,并使用语音处理 I/O 单元捕获麦克风音频(如 Apple 文档中所述:https://developer.apple.com/documentation/avfaudio/avaudiosession/mode/1616455-voicechat)。
语音处理I/O单元的初始化代码为:
AudioStreamBasicDescription audioFormat = new AudioStreamBasicDescription()
{
SampleRate = SAMPLERATE_8000,
Format = AudioFormatType.LinearPCM,
FormatFlags = AudioFormatFlags.LinearPCMIsSignedInteger | AudioFormatFlags.LinearPCMIsPacked,
FramesPerPacket = 1,
ChannelsPerFrame = CHANNELS,
BitsPerChannel = BITS_X_SAMPLE,
BytesPerPacket = BYTES_X_SAMPLE,
BytesPerFrame = BYTES_X_FRAME,
Reserved = 0
};
AudioComponent audioComp = AudioComponent.FindComponent(AudioTypeOutput.VoiceProcessingIO);
AudioUnit.AudioUnit voiceProcessing = new AudioUnit.AudioUnit(audioComp);
AudioUnitStatus unitStatus = AudioUnitStatus.NoError;
unitStatus = voiceProcessing.SetEnableIO(true, AudioUnitScopeType.Input, ELEM_Mic);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetEnableIO(true, AudioUnitScopeType.Input, ELEM_Mic) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetEnableIO(true, AudioUnitScopeType.Output, ELEM_Speaker);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetEnableIO(false, AudioUnitScopeType.Output, ELEM_Speaker) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetFormat(audioFormat, AudioUnitScopeType.Output, ELEM_Mic);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetFormat (MIC-OUTPUT) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetFormat(audioFormat, AudioUnitScopeType.Input, ELEM_Speaker);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetFormat (ELEM 0-INPUT) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetRenderCallback(AudioUnit_RenderCallback, AudioUnitScopeType.Input, ELEM_Speaker);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetRenderCallback returned: {0}", unitStatus);
}
...
voiceProcessing.Initialize();
voiceProcessing.Start();
RenderCallback 函数是:
private AudioUnitStatus AudioUnit_RenderCallback(AudioUnitRenderActionFlags actionFlags, AudioTimeStamp timeStamp, uint busNumber, uint numberFrames, AudioBuffers data)
{
AudioUnit.AudioUnit voiceProcessing = m_VoiceProcessing;
if (voiceProcessing != null)
{
// getting microphone input signal
var status = voiceProcessing.Render(ref actionFlags, timeStamp, ELEM_Mic, numberFrames, data);
if (status != AudioUnitStatus.OK)
{
return status;
}
if (data.Count > 0)
{
unsafe
{
short* samples = (short*)data[0].Data.ToPointer();
for (uint idxSrcFrame = 0; idxSrcFrame < numberFrames; idxSrcFrame++)
{
... send the collected microphone audio (samples[idxSrcFrame])
}
}
}
}
return AudioUnitStatus.NoError;
}
我遇到的问题是,如果启用了扬声器:m_AudioSession.OverrideOutputAudioPort(AVAudioSessionPortOverride.Speaker,输出错误)
然后麦克风音频损坏(有时无法理解语音)。
如果未启用扬声器(未设置 AVAudioSessionPortOverride.Speaker),则音频非常好。
我已经验证了Render函数返回的AudioBuffer中的NumberChannels为1(单声道)。
非常感谢任何帮助解决问题的命中。谢谢
更新:
AudioUnit_RenderCallback 方法每 32 毫秒调用一次。当扬声器被禁用时,接收到的帧数是 256,这是准确的(采样率为 8000)。当启用扬声器时,接收到的帧数为 85。
在这两种情况下,GetAudioFormat returns 预期值:BitsPerChannel=16、BytesPerFrame=2、FramesPerPacket=1、ChannelsPerFrame=1、SampleRate=8000
更新:
我最终使用来自硬件的采样率并执行自我下采样。必须理解音频单元应该能够执行下采样 https://developer.apple.com/library/archive/documentation/MusicAudio/Conceptual/AudioUnitHostingGuide_iOS/AudioUnitHostingFundamentals/AudioUnitHostingFundamentals.html#//apple_ref/doc/uid/TP40009492-CH3-SW11)) 但是当启用扬声器时我不可能让它工作。
我希望您是在实际设备上而不是模拟器上进行测试。
在代码中,您是否尝试过这样使用:
sampleRate = AudioSession.CurrentHardwareSampleRate;
最好不要强制采样率,而是从硬件检查采样率。可能是在使用扬声器期间,它会改变采样率,从而产生问题。
我会建议根据上述更改进行录制,看看音频是否有所改善,然后再尝试其他标志。
我正在维护一键通 VoIP 应用程序。 当 PTT 呼叫 运行 时,应用会创建一个音频会话
m_AudioSession = AVAudioSession.SharedInstance();
NSError error;
if (!m_AudioSession.SetCategory(AVAudioSession.CategoryPlayAndRecord, AVAudioSessionCategoryOptions.DefaultToSpeaker | AVAudioSessionCategoryOptions.AllowBluetooth, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the category");
}
if (!m_AudioSession.SetMode(AVAudioSession.ModeVoiceChat, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the mode");
}
if (!m_AudioSession.OverrideOutputAudioPort(AVAudioSessionPortOverride.Speaker, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error redirecting the audio to the loudspeaker");
}
if (!m_AudioSession.SetPreferredIOBufferDuration(0.06, out error)) // 60 milli seconds
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the preferred buffer duration");
}
if (!m_AudioSession.SetPreferredSampleRate(8000, out error)) // kHz
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error setting the preferred sample rate");
}
if (!m_AudioSession.SetActive(true, out error))
{
IOSErrorLogger.Log(DammLoggerLevel.Error, TAG, error, "Error activating the audio session");
}
使用 OutputAudioQueue 播放接收到的音频,并使用语音处理 I/O 单元捕获麦克风音频(如 Apple 文档中所述:https://developer.apple.com/documentation/avfaudio/avaudiosession/mode/1616455-voicechat)。 语音处理I/O单元的初始化代码为:
AudioStreamBasicDescription audioFormat = new AudioStreamBasicDescription()
{
SampleRate = SAMPLERATE_8000,
Format = AudioFormatType.LinearPCM,
FormatFlags = AudioFormatFlags.LinearPCMIsSignedInteger | AudioFormatFlags.LinearPCMIsPacked,
FramesPerPacket = 1,
ChannelsPerFrame = CHANNELS,
BitsPerChannel = BITS_X_SAMPLE,
BytesPerPacket = BYTES_X_SAMPLE,
BytesPerFrame = BYTES_X_FRAME,
Reserved = 0
};
AudioComponent audioComp = AudioComponent.FindComponent(AudioTypeOutput.VoiceProcessingIO);
AudioUnit.AudioUnit voiceProcessing = new AudioUnit.AudioUnit(audioComp);
AudioUnitStatus unitStatus = AudioUnitStatus.NoError;
unitStatus = voiceProcessing.SetEnableIO(true, AudioUnitScopeType.Input, ELEM_Mic);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetEnableIO(true, AudioUnitScopeType.Input, ELEM_Mic) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetEnableIO(true, AudioUnitScopeType.Output, ELEM_Speaker);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetEnableIO(false, AudioUnitScopeType.Output, ELEM_Speaker) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetFormat(audioFormat, AudioUnitScopeType.Output, ELEM_Mic);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetFormat (MIC-OUTPUT) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetFormat(audioFormat, AudioUnitScopeType.Input, ELEM_Speaker);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetFormat (ELEM 0-INPUT) returned: {0}", unitStatus);
}
unitStatus = voiceProcessing.SetRenderCallback(AudioUnit_RenderCallback, AudioUnitScopeType.Input, ELEM_Speaker);
if (unitStatus != AudioUnitStatus.NoError)
{
DammLogger.Log(DammLoggerLevel.Warn, TAG, "Audio Unit SetRenderCallback returned: {0}", unitStatus);
}
...
voiceProcessing.Initialize();
voiceProcessing.Start();
RenderCallback 函数是:
private AudioUnitStatus AudioUnit_RenderCallback(AudioUnitRenderActionFlags actionFlags, AudioTimeStamp timeStamp, uint busNumber, uint numberFrames, AudioBuffers data)
{
AudioUnit.AudioUnit voiceProcessing = m_VoiceProcessing;
if (voiceProcessing != null)
{
// getting microphone input signal
var status = voiceProcessing.Render(ref actionFlags, timeStamp, ELEM_Mic, numberFrames, data);
if (status != AudioUnitStatus.OK)
{
return status;
}
if (data.Count > 0)
{
unsafe
{
short* samples = (short*)data[0].Data.ToPointer();
for (uint idxSrcFrame = 0; idxSrcFrame < numberFrames; idxSrcFrame++)
{
... send the collected microphone audio (samples[idxSrcFrame])
}
}
}
}
return AudioUnitStatus.NoError;
}
我遇到的问题是,如果启用了扬声器:m_AudioSession.OverrideOutputAudioPort(AVAudioSessionPortOverride.Speaker,输出错误) 然后麦克风音频损坏(有时无法理解语音)。 如果未启用扬声器(未设置 AVAudioSessionPortOverride.Speaker),则音频非常好。
我已经验证了Render函数返回的AudioBuffer中的NumberChannels为1(单声道)。
非常感谢任何帮助解决问题的命中。谢谢
更新: AudioUnit_RenderCallback 方法每 32 毫秒调用一次。当扬声器被禁用时,接收到的帧数是 256,这是准确的(采样率为 8000)。当启用扬声器时,接收到的帧数为 85。 在这两种情况下,GetAudioFormat returns 预期值:BitsPerChannel=16、BytesPerFrame=2、FramesPerPacket=1、ChannelsPerFrame=1、SampleRate=8000
更新: 我最终使用来自硬件的采样率并执行自我下采样。必须理解音频单元应该能够执行下采样 https://developer.apple.com/library/archive/documentation/MusicAudio/Conceptual/AudioUnitHostingGuide_iOS/AudioUnitHostingFundamentals/AudioUnitHostingFundamentals.html#//apple_ref/doc/uid/TP40009492-CH3-SW11)) 但是当启用扬声器时我不可能让它工作。
我希望您是在实际设备上而不是模拟器上进行测试。
在代码中,您是否尝试过这样使用:
sampleRate = AudioSession.CurrentHardwareSampleRate;
最好不要强制采样率,而是从硬件检查采样率。可能是在使用扬声器期间,它会改变采样率,从而产生问题。
我会建议根据上述更改进行录制,看看音频是否有所改善,然后再尝试其他标志。