为什么我使用 WinAPI C++ 录制的声音无法在 audacity 中正确播放?
Why is my sound recording with WinAPI C++ not played back properly in audacity?
我正在尝试通过麦克风录制声音,但越来越难了。我已经尝试了几种方法,但它不起作用。我创建了一个仅用于测试的项目,稍后将在更大的项目中实施。这是相关项目的代码:
#include <iostream>
#include <fstream>
#include <Windows.h>
#include <dshow.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <ks.h>
#include <ksmedia.h>
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mf")
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfuuid")
#pragma comment(lib, "strmbase")
int main() {
HRESULT hr = MFStartup(MF_VERSION);
IMFMediaSource* pSoundSource = NULL;
IMFAttributes* pSoundConfig = NULL;
IMFActivate** ppSoundDevices = NULL;
hr = MFCreateAttributes(&pSoundConfig, 1);
if (FAILED(hr)) {
std::cout << "Failed to create attribute store";
}
hr = pSoundConfig->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID);
UINT32 count;
hr = MFEnumDeviceSources(pSoundConfig, &ppSoundDevices, &count);
if (FAILED(hr)) {
std::cout << "Failed to enumerate capture devices";
}
hr = ppSoundDevices[0]->ActivateObject(IID_PPV_ARGS(&pSoundSource));
if (FAILED(hr)) {
std::cout << "Failed to connect microphone to source";
}
IMFSourceReader* pSoundReader;
hr = MFCreateSourceReaderFromMediaSource(pSoundSource, pSoundConfig, &pSoundReader);
if (FAILED(hr)) {
std::cout << "Failed to create source reader";
}
/*This part is for getting the audio format that the microphone outputs*/
/*______________________*/
IMFMediaType* pSoundType = NULL;
DWORD dwMediaTypeIndex = 0;
DWORD dwStreamIndex = 0;
hr = pSoundReader->GetNativeMediaType(dwStreamIndex, dwMediaTypeIndex, &pSoundType);
LPVOID soundRepresentation;
pSoundType->GetRepresentation(AM_MEDIA_TYPE_REPRESENTATION, &soundRepresentation);
GUID subSoundType = ((AM_MEDIA_TYPE*)soundRepresentation)->subtype;
BYTE* pbSoundFormat = ((AM_MEDIA_TYPE*)soundRepresentation)->pbFormat;
GUID soundFormatType = ((AM_MEDIA_TYPE*)soundRepresentation)->formattype;
if (soundFormatType == FORMAT_WaveFormatEx) { std::cout << 8; }
WAVEFORMATEXTENSIBLE* soundFormat = (WAVEFORMATEXTENSIBLE*)pbSoundFormat;
std::cout << std::endl;
std::cout << soundFormat->Format.wFormatTag << std::endl;
std::cout << soundFormat->Format.nChannels << std::endl;
std::cout << soundFormat->Format.nBlockAlign << std::endl;
std::cout << soundFormat->Format.nSamplesPerSec << std::endl;
std::cout << soundFormat->Format.wBitsPerSample << std::endl;
std::cout << soundFormat->Format.cbSize << std::endl;
if (soundFormat->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
std::cout << "IEEE-FLOAT!" << std::endl;
/*_____________________*/
DWORD streamIndex, flags;
LONGLONG llTimeStamp;
IMFSample* pSoundSample;
while (true) {
hr = pSoundReader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, &streamIndex, &flags, &llTimeStamp, &pSoundSample);
if (FAILED(hr)) {
std::cout << "Failed to get sound from microphone";
}
if (pSoundSample != NULL) {
IMFMediaBuffer* pSoundBuffer;
pSoundSample->ConvertToContiguousBuffer(&pSoundBuffer);
DWORD soundlength;
pSoundBuffer->GetCurrentLength(&soundlength);
unsigned char* sounddata;
hr = pSoundBuffer->Lock(&sounddata, NULL, &soundlength);
if (FAILED(hr)) {
std::cout << "Failed to get sounddata from buffer";
}
std::ofstream file;
file.open("C:\Users\user\Documents\test.raw", std::ios::app);
for (unsigned int i = 0; i < soundlength; i++)
file << sounddata[i];
file.close();
}
}
}
应该确定控制台上打印的数据格式的代码部分:
8
65534
1
4
48000
32
22
IEEE-FLOAT!
据此,我确定声音是以 1 声道 32 位 48000Hz IEEE-FLOAT 格式录制的。现在我需要播放这个声音。问题是大多数 API 将采用 16 位 PCM 进行声音播放。
我尝试将声音转换为 16 位 PCM,但效果不佳。如果你知道如何做到这一点,你能展示一些代码吗?此外,在此处提供的代码中,我将声音附加到没有 header 的原始音频文件中。我听说 float 表示在 1 和 -1 之间,所以我尝试了以下代码来进行转换:
void iefloat_to_pcm16(unsigned char* sounddata, std::vector<unsigned char>& newdata, int soundlength) {
for (int i = 0; i < soundlength && i + 3 < soundlength; i += 4) {
float f;
unsigned char b[] = { sounddata[i], sounddata[i + 1], sounddata[i + 2], sounddata[i + 3] };
memcpy(&f, &b, sizeof(f));
short pcm16 = f * 32767 + 0.5;
newdata.push_back((unsigned char)(pcm16 >> 8));
newdata.push_back((unsigned char)pcm16);
}
}
此代码似乎无效。
在此之后,我一直在使用 Audacity 的文件 > 导入 > 原始数据,它允许导入原始数据并指定数据的格式。所以我选择了 1 通道 32 位浮点,48kHZ,我尝试了所有字节顺序无济于事。我对“转换”为 16 位 PCM 的数据做了同样的事情。结果只是大胆的随机噪音。我可以看到在我发出声音的地方有尖峰,其余地方没有声音。但尖峰只是噪音。我这里有什么地方做错了吗?
音频文件是二进制格式,但您在文件中放置了文本。
file << sounddata[i];
这是一个格式化的插入运算符,它将数据转换为文本表示形式。而是使用 file.write()
.
您可能还需要弄乱用于打开流的标志。 C++ 标准 I/O 流不是为二进制数据创建的。由于您已经广泛使用 Windows API 对象,您可能只是切换到 CreateFile
/ WriteFile
,其中表面之下没有活动的转换方面。
我正在尝试通过麦克风录制声音,但越来越难了。我已经尝试了几种方法,但它不起作用。我创建了一个仅用于测试的项目,稍后将在更大的项目中实施。这是相关项目的代码:
#include <iostream>
#include <fstream>
#include <Windows.h>
#include <dshow.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <ks.h>
#include <ksmedia.h>
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mf")
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "mfuuid")
#pragma comment(lib, "strmbase")
int main() {
HRESULT hr = MFStartup(MF_VERSION);
IMFMediaSource* pSoundSource = NULL;
IMFAttributes* pSoundConfig = NULL;
IMFActivate** ppSoundDevices = NULL;
hr = MFCreateAttributes(&pSoundConfig, 1);
if (FAILED(hr)) {
std::cout << "Failed to create attribute store";
}
hr = pSoundConfig->SetGUID(MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_AUDCAP_GUID);
UINT32 count;
hr = MFEnumDeviceSources(pSoundConfig, &ppSoundDevices, &count);
if (FAILED(hr)) {
std::cout << "Failed to enumerate capture devices";
}
hr = ppSoundDevices[0]->ActivateObject(IID_PPV_ARGS(&pSoundSource));
if (FAILED(hr)) {
std::cout << "Failed to connect microphone to source";
}
IMFSourceReader* pSoundReader;
hr = MFCreateSourceReaderFromMediaSource(pSoundSource, pSoundConfig, &pSoundReader);
if (FAILED(hr)) {
std::cout << "Failed to create source reader";
}
/*This part is for getting the audio format that the microphone outputs*/
/*______________________*/
IMFMediaType* pSoundType = NULL;
DWORD dwMediaTypeIndex = 0;
DWORD dwStreamIndex = 0;
hr = pSoundReader->GetNativeMediaType(dwStreamIndex, dwMediaTypeIndex, &pSoundType);
LPVOID soundRepresentation;
pSoundType->GetRepresentation(AM_MEDIA_TYPE_REPRESENTATION, &soundRepresentation);
GUID subSoundType = ((AM_MEDIA_TYPE*)soundRepresentation)->subtype;
BYTE* pbSoundFormat = ((AM_MEDIA_TYPE*)soundRepresentation)->pbFormat;
GUID soundFormatType = ((AM_MEDIA_TYPE*)soundRepresentation)->formattype;
if (soundFormatType == FORMAT_WaveFormatEx) { std::cout << 8; }
WAVEFORMATEXTENSIBLE* soundFormat = (WAVEFORMATEXTENSIBLE*)pbSoundFormat;
std::cout << std::endl;
std::cout << soundFormat->Format.wFormatTag << std::endl;
std::cout << soundFormat->Format.nChannels << std::endl;
std::cout << soundFormat->Format.nBlockAlign << std::endl;
std::cout << soundFormat->Format.nSamplesPerSec << std::endl;
std::cout << soundFormat->Format.wBitsPerSample << std::endl;
std::cout << soundFormat->Format.cbSize << std::endl;
if (soundFormat->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
std::cout << "IEEE-FLOAT!" << std::endl;
/*_____________________*/
DWORD streamIndex, flags;
LONGLONG llTimeStamp;
IMFSample* pSoundSample;
while (true) {
hr = pSoundReader->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM, 0, &streamIndex, &flags, &llTimeStamp, &pSoundSample);
if (FAILED(hr)) {
std::cout << "Failed to get sound from microphone";
}
if (pSoundSample != NULL) {
IMFMediaBuffer* pSoundBuffer;
pSoundSample->ConvertToContiguousBuffer(&pSoundBuffer);
DWORD soundlength;
pSoundBuffer->GetCurrentLength(&soundlength);
unsigned char* sounddata;
hr = pSoundBuffer->Lock(&sounddata, NULL, &soundlength);
if (FAILED(hr)) {
std::cout << "Failed to get sounddata from buffer";
}
std::ofstream file;
file.open("C:\Users\user\Documents\test.raw", std::ios::app);
for (unsigned int i = 0; i < soundlength; i++)
file << sounddata[i];
file.close();
}
}
}
应该确定控制台上打印的数据格式的代码部分:
8
65534
1
4
48000
32
22
IEEE-FLOAT!
据此,我确定声音是以 1 声道 32 位 48000Hz IEEE-FLOAT 格式录制的。现在我需要播放这个声音。问题是大多数 API 将采用 16 位 PCM 进行声音播放。
我尝试将声音转换为 16 位 PCM,但效果不佳。如果你知道如何做到这一点,你能展示一些代码吗?此外,在此处提供的代码中,我将声音附加到没有 header 的原始音频文件中。我听说 float 表示在 1 和 -1 之间,所以我尝试了以下代码来进行转换:
void iefloat_to_pcm16(unsigned char* sounddata, std::vector<unsigned char>& newdata, int soundlength) {
for (int i = 0; i < soundlength && i + 3 < soundlength; i += 4) {
float f;
unsigned char b[] = { sounddata[i], sounddata[i + 1], sounddata[i + 2], sounddata[i + 3] };
memcpy(&f, &b, sizeof(f));
short pcm16 = f * 32767 + 0.5;
newdata.push_back((unsigned char)(pcm16 >> 8));
newdata.push_back((unsigned char)pcm16);
}
}
此代码似乎无效。
在此之后,我一直在使用 Audacity 的文件 > 导入 > 原始数据,它允许导入原始数据并指定数据的格式。所以我选择了 1 通道 32 位浮点,48kHZ,我尝试了所有字节顺序无济于事。我对“转换”为 16 位 PCM 的数据做了同样的事情。结果只是大胆的随机噪音。我可以看到在我发出声音的地方有尖峰,其余地方没有声音。但尖峰只是噪音。我这里有什么地方做错了吗?
音频文件是二进制格式,但您在文件中放置了文本。
file << sounddata[i];
这是一个格式化的插入运算符,它将数据转换为文本表示形式。而是使用 file.write()
.
您可能还需要弄乱用于打开流的标志。 C++ 标准 I/O 流不是为二进制数据创建的。由于您已经广泛使用 Windows API 对象,您可能只是切换到 CreateFile
/ WriteFile
,其中表面之下没有活动的转换方面。