如何以独占模式使用 WASAPI?
How do you use WASAPI in exclusive mode?
我正在编写一个钢琴模拟器,我不断地向 WAS 发送缓冲区API API。我正在尝试在 AUDCLNT_SHAREMODE_EXCLUSIVE
模式下执行此操作,但我仍然不明白如何处理它。
使用下面的代码,我为每个对 PlayBuf()
的调用实例化了一个单独的线程。
问题是在实例化第一个线程后,如果我尝试实例化第二个线程,则会出现 AUDCLNT_E_DEVICE_IN_USE
消息。
这当然是我的错,因为我还没有理解如何在 EXCLUSIVE 模式下使用 wasapi。
谢谢
void PlayBuf(short *fileBytes, int fileSize)
{
HRESULT hr;
IMMDeviceEnumerator *deviceEnumerator = NULL;
IMMDevice* audioDevice;
IAudioClient2* audioClient;
WAVEFORMATEX wfx = {};
IAudioRenderClient* audioRenderClient;
UINT32 bufferSizeInFrames;
UINT32 bufferPadding;
int16_t* buffer;
CoInitialize(NULL);
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
assert (hr == S_OK);
hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
assert(hr == S_OK);
deviceEnumerator->Release();
hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
assert(hr == S_OK);
audioDevice->Release();
wfx.wFormatTag = WAVE_FORMAT_PCM;
wfx.nChannels = 2;
wfx.nSamplesPerSec = 44100;
wfx.wBitsPerSample = 16;
wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
const int64_t REFTIMES_PER_SEC = 10000000;
REFERENCE_TIME requestedSoundBufferDuration = REFTIMES_PER_SEC * DurataSuono;
DWORD initStreamFlags = ( AUDCLNT_STREAMFLAGS_RATEADJUST);
hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,initStreamFlags,requestedSoundBufferDuration,0, &wfx, NULL);
assert(hr == S_OK);
hr = audioClient->GetService(__uuidof(IAudioRenderClient),
(LPVOID*)(&audioRenderClient));
assert(hr == S_OK);
hr = audioClient->GetBufferSize(&bufferSizeInFrames);
assert(hr == S_OK);
audioClient->Reset();
hr = audioClient->Start();
assert(hr == S_OK);
hr = audioRenderClient->GetBuffer(fileSize, (BYTE**)(&buffer));
assert(hr == S_OK);
hr = audioRenderClient->ReleaseBuffer(fileSize, 0);
assert(hr == S_OK);
Sleep(2000);
audioClient->Stop();
audioClient->Release();
audioRenderClient->Release();
}
我花了一个小时为您准备了一个基本示例。它是在 C# 中使用我自己的音频 I/O 库 XT-Audio(所以打算插入)但在 C++ 中使用原始 wasapi 可能需要我半天时间。无论如何,我相信这非常接近您正在寻找的东西。正如您在下面看到的,这个应用程序拥有世界上最棒的 GUI:
只要您按下开始,应用程序就会开始将键盘输入转换为音频。您可以按住 c、d、e、f 和 g 键盘键来生成音符。它也处理多个重叠的音符(和弦)。我选择 select wasapi 共享模式作为后端,因为它支持浮点音频,但如果将音频转换为 16 位整数格式,这与独占模式同样有效。
使用此库与原始 wasapi 的区别在于,音频线程由库管理,应用程序会定期调用它的音频回调函数来合成音频数据。然而,这很容易转换回使用 c++ 的本机 wasapi:只需在后台线程的循环中调用 IAudioRenderClient::GetBuffer/ReleaseBuffer,并在这些调用之间进行处理。
无论如何,关键部分是:此应用程序仅使用 2 个线程,一个用于 UI(由 winforms 管理),一个用于音频(由音频库管理),但它能够同时演奏多个音符,我相信这是您问题的核心。
我在此处上传了完整的 visual studio 解决方案和二进制文件:WasapiSynthSample 但为了完整起见,我将 post 下面代码中有趣的部分。
using System;
using System.Threading;
using System.Windows.Forms;
using Xt;
namespace WasapiSynthSample
{
public partial class Program : Form
{
// sampling rate
const int Rate = 48000;
// stereo
const int Channels = 2;
// default format for wasapi shared mode
const XtSample Sample = XtSample.Float32;
// C, D, E, F, G
static readonly float[] NoteFrequencies = { 523.25f, 587.33f, 659.25f, 698.46f, 783.99f };
[STAThread]
static void Main()
{
// initialize audio library
using (var platform = XtAudio.Init(null, IntPtr.Zero, null))
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.ThreadException += OnApplicationThreadException;
AppDomain.CurrentDomain.UnhandledException += OnCurrentDomainUnhandledException;
Application.Run(new Program(platform));
}
}
// pop a messagebox on any error
static void OnApplicationThreadException(object sender, ThreadExceptionEventArgs e)
=> OnError(e.Exception);
static void OnCurrentDomainUnhandledException(object sender, UnhandledExceptionEventArgs e)
=> OnError((Exception)e.ExceptionObject);
static void OnError(Exception e)
{
var text = e.ToString();
if (e is XtException xte) text = XtAudio.GetErrorInfo(xte.GetError()).ToString();
MessageBox.Show(text);
}
XtStream _stream;
readonly XtPlatform _platform;
// note phases
readonly float[] _phases = new float[5];
// tracks key down/up
readonly bool[] _notesActive = new bool[5];
public Program(XtPlatform platform)
{
_platform = platform;
InitializeComponent();
}
// activate note
protected override void OnKeyDown(KeyEventArgs e)
{
base.OnKeyDown(e);
if (e.KeyCode == Keys.C) _notesActive[0] = true;
if (e.KeyCode == Keys.D) _notesActive[1] = true;
if (e.KeyCode == Keys.E) _notesActive[2] = true;
if (e.KeyCode == Keys.F) _notesActive[3] = true;
if (e.KeyCode == Keys.G) _notesActive[4] = true;
}
// deactive note
protected override void OnKeyUp(KeyEventArgs e)
{
base.OnKeyUp(e);
if (e.KeyCode == Keys.C) _notesActive[0] = false;
if (e.KeyCode == Keys.D) _notesActive[1] = false;
if (e.KeyCode == Keys.E) _notesActive[2] = false;
if (e.KeyCode == Keys.F) _notesActive[3] = false;
if (e.KeyCode == Keys.G) _notesActive[4] = false;
}
// stop stream
void OnStop(object sender, EventArgs e)
{
_stream?.Stop();
_stream?.Dispose();
_stream = null;
_start.Enabled = true;
_stop.Enabled = false;
}
// start stream
void OnStart(object sender, EventArgs e)
{
var service = _platform.GetService(XtSystem.WASAPI);
var id = service.GetDefaultDeviceId(true);
using (var device = service.OpenDevice(id))
{
var mix = new XtMix(Rate, Sample);
var channels = new XtChannels(0, 0, Channels, 0);
var format = new XtFormat(in mix, in channels);
var buffer = device.GetBufferSize(in format).current;
var streamParams = new XtStreamParams(true, OnBuffer, null, null);
var deviceParams = new XtDeviceStreamParams(in streamParams, in format, buffer);
_stream = device.OpenStream(in deviceParams, null);
_stream.Start();
_start.Enabled = false;
_stop.Enabled = true;
}
}
// this gets called on the audio thread by the audio library
// but could just as well be your c++ code managing its own threads
unsafe int OnBuffer(XtStream stream, in XtBuffer buffer, object user)
{
// process audio buffer of N frames
for (int f = 0; f < buffer.frames; f++)
{
// compose current sample of all currently active notes
float sample = 0.0f;
for (int n = 0; n < NoteFrequencies.Length; n++)
{
if (_notesActive[n])
{
_phases[n] += NoteFrequencies[n] / Rate;
if (_phases[n] >= 1.0f) _phases[n] = -1.0f;
float noteSample = (float)Math.Sin(2.0 * _phases[n] * Math.PI);
sample += noteSample / NoteFrequencies.Length;
}
}
// write current sample to output buffer
for (int c = 0; c < Channels; c++)
((float*)buffer.output)[f * Channels + c] = sample;
}
return 0;
}
}
}
我在线程代码中加入了FillBufferWasapi()是为了让我的代码更加清晰,我在实时应用方面经验不多,但是看不到错误
int wavPlaybackSample = 0;
int k=0;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
DWORD retval = WaitForSingleObject(hEvent, 2000);
for(int ii=0;ii<255;ii++)
{
if(MyKeyDown[ii] == 1)
{
hr = audioRenderClient->GetBuffer(bufferSizeInFrames, (BYTE**)(&buffer));
assert(hr == S_OK);
for (UINT32 frameIndex = 0+k; frameIndex < bufferSizeInFrames+k; ++frameIndex)
{
*buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // left
*buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // right
}
k+=bufferSizeInFrames;
hr = audioRenderClient->ReleaseBuffer(bufferSizeInFrames, flags);
assert(hr == S_OK);
if(k >= MyBufferLength/4)
{
k=0;
wavPlaybackSample=0;
}
}
}
每次按下一个键,我都会将相应的标志设置为 1,以便对包含样本的缓冲区求和。
我的版本和你的合成器版本之间的区别在于我的版本使用 88 个预加载缓冲区,其中包含真实钢琴的声音 (wav)。
int16_t* buffer;
int MyKeyDown[255];
int MyKeyCode[255];
short *fileBytes = new short[MyBufferLength];
void __fastcall TMyThread::Execute()
{
HRESULT hr;
int16_t* buffer;
HANDLE hEvent = NULL;
REFERENCE_TIME hnsRequestedDuration = 0;
DWORD flags = 0;
CoInitialize(NULL);
//CoInitializeEx( NULL, COINIT_MULTITHREADED );
IMMDeviceEnumerator *deviceEnumerator;
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
assert (hr == S_OK);
IMMDevice* audioDevice;
hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
assert(hr == S_OK);
deviceEnumerator->Release();
IAudioClient2* audioClient;
hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
assert(hr == S_OK);
audioDevice->Release();
WAVEFORMATEX wfx = {};
wfx.wFormatTag = WAVE_FORMAT_PCM;
wfx.nChannels = 2;
wfx.nSamplesPerSec = 44100;
wfx.wBitsPerSample = 16;
wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
hr = audioClient->GetDevicePeriod(NULL, &hnsRequestedDuration);
assert(hr == S_OK);
hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
hnsRequestedDuration,
hnsRequestedDuration,
&wfx,
NULL);
// If the requested buffer size is not aligned...
UINT32 nFrames = 0;
if(hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED)
{
// Get the next aligned frame.
hr = audioClient->GetBufferSize(&nFrames);
assert (hr == S_OK);
hnsRequestedDuration = (REFERENCE_TIME)
((10000.0 * 1000 / wfx.nSamplesPerSec * nFrames) + 0.5);
// Create a new audio client.
hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
assert(hr == S_OK);
// Open the stream and associate it with an audio session.
hr = audioClient->Initialize(
AUDCLNT_SHAREMODE_EXCLUSIVE,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
hnsRequestedDuration,
hnsRequestedDuration,
&wfx,
NULL);
assert(hr == S_OK);
}
hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
if (hEvent == NULL)
{
hr = E_FAIL;
ShowMessage("CreateEvent fail!!!");
}
hr = audioClient->SetEventHandle(hEvent);
assert(hr == S_OK);
IAudioRenderClient *audioRenderClient;
hr = audioClient->GetService(__uuidof(IAudioRenderClient),
(LPVOID*)(&audioRenderClient));
assert(hr == S_OK);
UINT32 bufferSizeInFrames;
hr = audioClient->GetBufferSize(&bufferSizeInFrames);
assert(hr == S_OK);
// from here play buffer
hr = audioClient->Start();
assert(hr == S_OK);
int wavPlaybackSample = 0;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
DWORD retval = WaitForSingleObject(hEvent, 2000);
UINT32 bufferPadding;
hr = audioClient->GetCurrentPadding(&bufferPadding);
assert(hr == S_OK);
UINT32 soundBufferLatency = bufferSizeInFrames / 1;
UINT32 numFramesToWrite = soundBufferLatency - bufferPadding;
FillBufferWasapi();
hr = audioRenderClient->GetBuffer(numFramesToWrite, (BYTE**)(&buffer));
assert(hr == S_OK);
for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
{
*buffer++ = fileBytes[wavPlaybackSample]; // left
*buffer++ = fileBytes[wavPlaybackSample]; // right
++wavPlaybackSample;
//wavPlaybackSample %= fileSize;
}
hr = audioRenderClient->ReleaseBuffer(numFramesToWrite, flags);
assert(hr == S_OK);
//Sleep((DWORD)(hnsRequestedDuration/10000000));
}
audioClient->Stop();
audioClient->Release();
audioRenderClient->Release();
CoUninitialize();
}
//---------------------------------------------------------------------------
void FillBufferWasapi()
{
for(int ii=0;ii<255;ii++)
{
if(MyKeyDown[ii] == 1)
{
for(int i=0; i<MyBufferLength;i++)
fileBytes[i]+=bfn[KeyCode[ii]][i];
}
}
}
//---------------------------------------------------------------------------
void __fastcall TForm1::AppMessage(MSG &Msg, bool &Handled)
{
MyKeyCode['Z']=3; // C1
MyKeyCode['X']=5; // D1
MyKeyCode['C']=7; // E1
switch (Msg.message)
{
case WM_KEYDOWN:
if(MyKeyDown[Msg.wParam] == 0)
{
MyKeyDown[Msg.wParam] = 1;
}
break;
case WM_KEYUP:
if(MyKeyDown[Msg.wParam] == 1)
{
MyKeyDown[Msg.wParam] = 0;
}
break;
}
}
这应该 99% 符合您的需求,它是一个使用 wasapi 的纯 c++ 示例播放器。
编译和link:
- 需要符合 c++17(+) 的编译器
- 安装boost库,用于无锁队列
- 可能需要 MS c++ 编译器(使用 conio.h)
- 实时音频线程参考avrt.lib(使用AvSetMmThreadPriority)
- 如有需要,full vs2019 project
至运行:
- 您需要 5 个 44100 16 位立体声格式的 .wav 文件,名称为 c4.wav 到 g4.wav。
- 见SamplePack
它的作用:
- 控制台应用程序 运行s 一个 getchar() 循环,c、d、e、f、g,触发音符打开,q 退出
- 因为它是一个控制台应用程序,所以没有注释消息。每次按键都会触发完整样本的播放。
- Note-down 被标记上时间戳并发布到共享队列(这是 boost 无锁的东西,大小上限为 64)。
- 因此,您可以通过在 3 毫秒间隔内按下超过 64 个键(最小 wasapi 独占延迟)来使其崩溃。
- 音频线程拾取这些消息,并将它们放入音频线程本地的“活动笔记”列表中。活动音符受最大复音 (64) 限制。
- 因此,您也可以在[最短样本的长度] 秒内按超过 64 个键使其崩溃。
- 将每个活动音符混合到当前 wasapi 缓冲区中,直到它到达 .wav 样本的末尾。
代码如下:
#include <atomic>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <fstream>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <boost/lockfree/queue.hpp>
#include <conio.h>
#include <atlbase.h>
#include <Windows.h>
#include <avrt.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
// for wasapi event callback
static HANDLE event_handle;
// sample data
static const size_t sample_count = 5;
static int16_t* note_samples[sample_count];
static size_t note_frame_counts[sample_count];
static std::vector<char> note_samples_raw[sample_count];
static char const* note_files[sample_count] = {
"c4.wav", "d4.wav", "e4.wav", "f4.wav", "g4.wav"
};
// user input / audio thread communication
static std::atomic_bool stop_finished;
static std::atomic_bool stop_initiated;
// scale mix volume
static const double mix_scale_amp = 0.4;
// debug stuff
static int32_t prev_note_active_count = 0;
static int32_t prev_note_audible_count = 0;
// timing stuff
static const int64_t millis_per_second = 1000;
static const int64_t reftimes_per_milli = 10000;
// audio format = 44.1khz 16bit stereo
static const int32_t sample_size = 2;
static const int32_t channel_count = 2;
static const int32_t sample_rate = 44100;
static const int32_t frame_size = sample_size * channel_count;
// exclusive mode event driven must use 128-byte aligned buffers
static const int32_t alignment_requirement_bytes = 128;
// note down notification + timestamp
static const size_t note_queue_size = 64;
struct note_down_msg
{
int32_t note; // 0..4 = c..g
uint64_t time_stamp_qpc;
};
static boost::lockfree::queue<note_down_msg>
note_msg_queue(note_queue_size);
// current playing notes
static const size_t max_polyphony = 64;
struct active_note
{
// slot in use?
bool in_use;
// note + timestamp
note_down_msg msg;
// position relative to stream pos when it should start
uint64_t trigger_pos_frames;
// how many of it has played already
size_t frames_rendered;
active_note() = default;
};
static active_note
active_notes[max_polyphony];
// shared by user input / audio thread
struct audio_thread_data
{
IAudioClock* clock;
IAudioClient* client;
IAudioRenderClient* render;
};
// bail out on any error
#define CHECK_COM(expr) do { \
HRESULT hr = expr; \
if(SUCCEEDED(hr)) break; \
std::cout << #expr << ": " << hr << "\n"; \
std::terminate(); \
} while(0)
static WAVEFORMATEXTENSIBLE
make_audio_format()
{
// translate format specification to WAVEFORMATEXTENSIBLE
WAVEFORMATEXTENSIBLE result = { 0 };
result.dwChannelMask = 0;
result.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
result.Samples.wValidBitsPerSample = sample_size * 8;
result.Format.nChannels = channel_count;
result.Format.nSamplesPerSec = sample_rate;
result.Format.wBitsPerSample = sample_size * 8;
result.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
result.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE);
result.Format.nBlockAlign = channel_count * sample_size;
result.Format.nAvgBytesPerSec = channel_count * sample_size * sample_rate;
return result;
}
static void
load_note_samples()
{
for(size_t i = 0; i < sample_count; i++)
{
// load piano samples to bytes
auto path = std::filesystem::current_path() / note_files[i];
std::ifstream input(path, std::ios::binary);
assert(input);
input.seekg(0, input.end);
size_t length = input.tellg();
input.seekg(0, input.beg);
note_samples_raw[i].reserve(length);
input.read(note_samples_raw[i].data(), length);
assert(input);
input.close();
// compute frame count and set actual audio data
// 44 bytes skipped for .WAV file header
note_frame_counts[i] = (length - 44) / (sample_size * channel_count);
note_samples[i] = reinterpret_cast<int16_t*>(note_samples_raw[i].data() + 44);
}
}
// this runs audio processing
static DWORD WINAPI
run_audio_thread(void* param)
{
int16_t* audio;
BYTE* audio_mem;
bool slot_found;
UINT32 buffer_frames;
HANDLE task;
BOOL success;
DWORD wait_result;
DWORD task_index = 0;
UINT64 clock_pos;
UINT64 clock_freq;
UINT64 clock_qpc_pos;
LARGE_INTEGER qpc_freq;
audio_thread_data* data = static_cast<audio_thread_data*>(param);
// init thread
CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
task = AvSetMmThreadCharacteristicsW(TEXT("Pro Audio"), &task_index);
assert(task != nullptr);
// wasapi buffer frame count & clock info
CHECK_COM(data->client->GetBufferSize(&buffer_frames));
CHECK_COM(data->clock->GetFrequency(&clock_freq));
success = QueryPerformanceFrequency(&qpc_freq);
assert(success);
// audio loop
data->client->Start();
while(!stop_initiated.load())
{
wait_result = WaitForSingleObject(event_handle, INFINITE);
assert(wait_result == WAIT_OBJECT_0);
// retrieve and clear buffer for this round
CHECK_COM(data->render->GetBuffer(buffer_frames, &audio_mem));
audio = reinterpret_cast<int16_t*>(audio_mem);
memset(audio, 0, buffer_frames * static_cast<uint64_t>(frame_size));
// get timing stuff
CHECK_COM(data->clock->GetPosition(&clock_pos, &clock_qpc_pos));
uint64_t stream_offset_hns = clock_pos * reftimes_per_milli * millis_per_second / clock_freq;
uint64_t stream_offset_frames = stream_offset_hns * sample_rate / (reftimes_per_milli * millis_per_second);
// process each frame
for(size_t f = 0; f < buffer_frames; f++)
{
// pop user input, find empty slot in active notes buffer
// for better performance this can also be outside the frame
// loop, at start of each buffer round, in that case add 1 additional buffer latency
note_down_msg msg;
while(note_msg_queue.pop(msg))
{
slot_found = false;
for(size_t i = 0; i < max_polyphony; i++)
if(!active_notes[i].in_use)
{
slot_found = true;
active_notes[i].msg = msg;
active_notes[i].in_use = true;
active_notes[i].frames_rendered = 0;
int64_t clock_note_diff_qpc = clock_qpc_pos - static_cast<int64_t>(active_notes[i].msg.time_stamp_qpc);
int64_t clock_note_diff_hns = clock_note_diff_qpc * reftimes_per_milli * millis_per_second / qpc_freq.QuadPart;
int64_t clock_note_diff_frames = clock_note_diff_hns * sample_rate / (reftimes_per_milli * millis_per_second);
int64_t note_clock_diff_frames = -static_cast<int64_t>(clock_note_diff_frames);
// allow 1 buffer latency otherwise notes would have to start in the past
active_notes[i].trigger_pos_frames = stream_offset_frames + note_clock_diff_frames + buffer_frames;
assert(active_notes[i].trigger_pos_frames <= stream_offset_frames + buffer_frames * 3);
assert(active_notes[i].trigger_pos_frames >= stream_offset_frames + f);
break;
}
if(!slot_found)
assert(!"Max polyphony reached.");
}
// debugging stuff
int32_t note_active_count = 0;
int32_t note_audible_count = 0;
// compose frame from all samples active up to max_polyphony
double current_samples[channel_count] = { 0 };
for(size_t i = 0; i < max_polyphony; i++)
{
// slot not in use
if(!active_notes[i].in_use) continue;
note_active_count++;
// not my turn yet
// note this very briefly wastes a slot for a sample which starts halfway in the current buffer
if(active_notes[i].trigger_pos_frames > stream_offset_frames + f) continue;
if(active_notes[i].frames_rendered == note_frame_counts[active_notes[i].msg.note])
{
// reached sample end
active_notes[i].in_use = false;
active_notes[i].frames_rendered = 0;
continue;
}
// note is active + audible
note_audible_count++;
size_t frame_index = active_notes[i].frames_rendered++;
for(size_t c = 0; c < channel_count; c++)
{
assert(active_notes[i].msg.note < sample_count);
assert(frame_index < note_frame_counts[active_notes[i].msg.note]);
current_samples[c] += static_cast<double>(note_samples[active_notes[i].msg.note][frame_index * channel_count + c] * mix_scale_amp) / SHRT_MAX;
}
}
// normally never do io on the audio thread, just debugging
if(prev_note_active_count != note_active_count || prev_note_audible_count != note_audible_count)
;//std::cout << "\nactive: " << note_active_count << " audible: " << note_audible_count << "\n";
prev_note_active_count = note_active_count;
prev_note_audible_count = note_audible_count;
// convert to int16 and write to wasapi
for(size_t c = 0; c < channel_count; c++)
audio[f * channel_count + c] = static_cast<int16_t>(current_samples[c] * SHRT_MAX);
}
CHECK_COM(data->render->ReleaseBuffer(buffer_frames, 0));
}
data->client->Stop();
// cleanup
success = AvRevertMmThreadCharacteristics(task);
assert(success);
CoUninitialize();
stop_finished.store(true);
return 0;
}
// this runs user input
static void
run_user_input_thread()
{
int32_t chr;
int32_t note;
BOOL success;
UINT32 buffer_frames;
REFERENCE_TIME engine;
REFERENCE_TIME period;
LARGE_INTEGER qpc_count;
CComPtr<IMMDevice> device;
CComPtr<IAudioClock> clock;
CComPtr<IAudioClient> client;
CComPtr<IAudioRenderClient> render;
CComPtr<IMMDeviceEnumerator> enumerator;
WAVEFORMATEXTENSIBLE format = make_audio_format();
// get default render endpoint
CHECK_COM(CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL,
__uuidof(IMMDeviceEnumerator), reinterpret_cast<void**>(&enumerator)));
CHECK_COM(enumerator->GetDefaultAudioEndpoint(eRender, eMultimedia, &device));
CHECK_COM(device->Activate(__uuidof(IAudioClient), CLSCTX_ALL,
nullptr, reinterpret_cast<void**>(&client)));
// open exclusive mode event driven stream
CHECK_COM(client->GetDevicePeriod(&engine, &period));
buffer_frames = static_cast<uint32_t>(period / reftimes_per_milli * sample_rate / millis_per_second);
while((buffer_frames * frame_size) % alignment_requirement_bytes != 0) buffer_frames++;
period = buffer_frames * millis_per_second * reftimes_per_milli / sample_rate;
CHECK_COM(client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
period, period, reinterpret_cast<WAVEFORMATEX*>(&format), nullptr));
event_handle = CreateEvent(nullptr, FALSE, FALSE, nullptr);
assert(event_handle != nullptr);
CHECK_COM(client->SetEventHandle(event_handle));
CHECK_COM(client->GetService(__uuidof(IAudioClock), reinterpret_cast<void**>(&clock)));
CHECK_COM(client->GetService(__uuidof(IAudioRenderClient), reinterpret_cast<void**>(&render)));
// start audio thread
audio_thread_data data = { 0 };
data.clock = clock;
data.client = client;
data.render = render;
CreateThread(nullptr, 0, run_audio_thread, &data, 0, nullptr);
// process user input
// cdefg = notes, q = quit
while((chr = _getch()) != 'q')
{
if(chr == 'c') note = 0;
else if(chr == 'd') note = 1;
else if(chr == 'e') note = 2;
else if(chr == 'f') note = 3;
else if(chr == 'g') note = 4;
else continue;
success = QueryPerformanceCounter(&qpc_count);
note_down_msg msg;
msg.note = note;
msg.time_stamp_qpc = qpc_count.QuadPart;
assert(success);
note_msg_queue.push(msg);
_putch(chr);
}
// cleanup
stop_initiated.store(true);
while(!stop_finished.load());
success = CloseHandle(event_handle);
assert(success);
}
int
main(int argc, char** argv)
{
// wraps COM init/cleanup
CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
load_note_samples();
run_user_input_thread();
CoUninitialize();
return 0;
}
我正在编写一个钢琴模拟器,我不断地向 WAS 发送缓冲区API API。我正在尝试在 AUDCLNT_SHAREMODE_EXCLUSIVE
模式下执行此操作,但我仍然不明白如何处理它。
使用下面的代码,我为每个对 PlayBuf()
的调用实例化了一个单独的线程。
问题是在实例化第一个线程后,如果我尝试实例化第二个线程,则会出现 AUDCLNT_E_DEVICE_IN_USE
消息。
这当然是我的错,因为我还没有理解如何在 EXCLUSIVE 模式下使用 wasapi。
谢谢
void PlayBuf(short *fileBytes, int fileSize)
{
HRESULT hr;
IMMDeviceEnumerator *deviceEnumerator = NULL;
IMMDevice* audioDevice;
IAudioClient2* audioClient;
WAVEFORMATEX wfx = {};
IAudioRenderClient* audioRenderClient;
UINT32 bufferSizeInFrames;
UINT32 bufferPadding;
int16_t* buffer;
CoInitialize(NULL);
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
assert (hr == S_OK);
hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
assert(hr == S_OK);
deviceEnumerator->Release();
hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
assert(hr == S_OK);
audioDevice->Release();
wfx.wFormatTag = WAVE_FORMAT_PCM;
wfx.nChannels = 2;
wfx.nSamplesPerSec = 44100;
wfx.wBitsPerSample = 16;
wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
const int64_t REFTIMES_PER_SEC = 10000000;
REFERENCE_TIME requestedSoundBufferDuration = REFTIMES_PER_SEC * DurataSuono;
DWORD initStreamFlags = ( AUDCLNT_STREAMFLAGS_RATEADJUST);
hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,initStreamFlags,requestedSoundBufferDuration,0, &wfx, NULL);
assert(hr == S_OK);
hr = audioClient->GetService(__uuidof(IAudioRenderClient),
(LPVOID*)(&audioRenderClient));
assert(hr == S_OK);
hr = audioClient->GetBufferSize(&bufferSizeInFrames);
assert(hr == S_OK);
audioClient->Reset();
hr = audioClient->Start();
assert(hr == S_OK);
hr = audioRenderClient->GetBuffer(fileSize, (BYTE**)(&buffer));
assert(hr == S_OK);
hr = audioRenderClient->ReleaseBuffer(fileSize, 0);
assert(hr == S_OK);
Sleep(2000);
audioClient->Stop();
audioClient->Release();
audioRenderClient->Release();
}
我花了一个小时为您准备了一个基本示例。它是在 C# 中使用我自己的音频 I/O 库 XT-Audio(所以打算插入)但在 C++ 中使用原始 wasapi 可能需要我半天时间。无论如何,我相信这非常接近您正在寻找的东西。正如您在下面看到的,这个应用程序拥有世界上最棒的 GUI:
只要您按下开始,应用程序就会开始将键盘输入转换为音频。您可以按住 c、d、e、f 和 g 键盘键来生成音符。它也处理多个重叠的音符(和弦)。我选择 select wasapi 共享模式作为后端,因为它支持浮点音频,但如果将音频转换为 16 位整数格式,这与独占模式同样有效。
使用此库与原始 wasapi 的区别在于,音频线程由库管理,应用程序会定期调用它的音频回调函数来合成音频数据。然而,这很容易转换回使用 c++ 的本机 wasapi:只需在后台线程的循环中调用 IAudioRenderClient::GetBuffer/ReleaseBuffer,并在这些调用之间进行处理。
无论如何,关键部分是:此应用程序仅使用 2 个线程,一个用于 UI(由 winforms 管理),一个用于音频(由音频库管理),但它能够同时演奏多个音符,我相信这是您问题的核心。
我在此处上传了完整的 visual studio 解决方案和二进制文件:WasapiSynthSample 但为了完整起见,我将 post 下面代码中有趣的部分。
using System;
using System.Threading;
using System.Windows.Forms;
using Xt;
namespace WasapiSynthSample
{
public partial class Program : Form
{
// sampling rate
const int Rate = 48000;
// stereo
const int Channels = 2;
// default format for wasapi shared mode
const XtSample Sample = XtSample.Float32;
// C, D, E, F, G
static readonly float[] NoteFrequencies = { 523.25f, 587.33f, 659.25f, 698.46f, 783.99f };
[STAThread]
static void Main()
{
// initialize audio library
using (var platform = XtAudio.Init(null, IntPtr.Zero, null))
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.ThreadException += OnApplicationThreadException;
AppDomain.CurrentDomain.UnhandledException += OnCurrentDomainUnhandledException;
Application.Run(new Program(platform));
}
}
// pop a messagebox on any error
static void OnApplicationThreadException(object sender, ThreadExceptionEventArgs e)
=> OnError(e.Exception);
static void OnCurrentDomainUnhandledException(object sender, UnhandledExceptionEventArgs e)
=> OnError((Exception)e.ExceptionObject);
static void OnError(Exception e)
{
var text = e.ToString();
if (e is XtException xte) text = XtAudio.GetErrorInfo(xte.GetError()).ToString();
MessageBox.Show(text);
}
XtStream _stream;
readonly XtPlatform _platform;
// note phases
readonly float[] _phases = new float[5];
// tracks key down/up
readonly bool[] _notesActive = new bool[5];
public Program(XtPlatform platform)
{
_platform = platform;
InitializeComponent();
}
// activate note
protected override void OnKeyDown(KeyEventArgs e)
{
base.OnKeyDown(e);
if (e.KeyCode == Keys.C) _notesActive[0] = true;
if (e.KeyCode == Keys.D) _notesActive[1] = true;
if (e.KeyCode == Keys.E) _notesActive[2] = true;
if (e.KeyCode == Keys.F) _notesActive[3] = true;
if (e.KeyCode == Keys.G) _notesActive[4] = true;
}
// deactive note
protected override void OnKeyUp(KeyEventArgs e)
{
base.OnKeyUp(e);
if (e.KeyCode == Keys.C) _notesActive[0] = false;
if (e.KeyCode == Keys.D) _notesActive[1] = false;
if (e.KeyCode == Keys.E) _notesActive[2] = false;
if (e.KeyCode == Keys.F) _notesActive[3] = false;
if (e.KeyCode == Keys.G) _notesActive[4] = false;
}
// stop stream
void OnStop(object sender, EventArgs e)
{
_stream?.Stop();
_stream?.Dispose();
_stream = null;
_start.Enabled = true;
_stop.Enabled = false;
}
// start stream
void OnStart(object sender, EventArgs e)
{
var service = _platform.GetService(XtSystem.WASAPI);
var id = service.GetDefaultDeviceId(true);
using (var device = service.OpenDevice(id))
{
var mix = new XtMix(Rate, Sample);
var channels = new XtChannels(0, 0, Channels, 0);
var format = new XtFormat(in mix, in channels);
var buffer = device.GetBufferSize(in format).current;
var streamParams = new XtStreamParams(true, OnBuffer, null, null);
var deviceParams = new XtDeviceStreamParams(in streamParams, in format, buffer);
_stream = device.OpenStream(in deviceParams, null);
_stream.Start();
_start.Enabled = false;
_stop.Enabled = true;
}
}
// this gets called on the audio thread by the audio library
// but could just as well be your c++ code managing its own threads
unsafe int OnBuffer(XtStream stream, in XtBuffer buffer, object user)
{
// process audio buffer of N frames
for (int f = 0; f < buffer.frames; f++)
{
// compose current sample of all currently active notes
float sample = 0.0f;
for (int n = 0; n < NoteFrequencies.Length; n++)
{
if (_notesActive[n])
{
_phases[n] += NoteFrequencies[n] / Rate;
if (_phases[n] >= 1.0f) _phases[n] = -1.0f;
float noteSample = (float)Math.Sin(2.0 * _phases[n] * Math.PI);
sample += noteSample / NoteFrequencies.Length;
}
}
// write current sample to output buffer
for (int c = 0; c < Channels; c++)
((float*)buffer.output)[f * Channels + c] = sample;
}
return 0;
}
}
}
我在线程代码中加入了FillBufferWasapi()是为了让我的代码更加清晰,我在实时应用方面经验不多,但是看不到错误
int wavPlaybackSample = 0;
int k=0;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
DWORD retval = WaitForSingleObject(hEvent, 2000);
for(int ii=0;ii<255;ii++)
{
if(MyKeyDown[ii] == 1)
{
hr = audioRenderClient->GetBuffer(bufferSizeInFrames, (BYTE**)(&buffer));
assert(hr == S_OK);
for (UINT32 frameIndex = 0+k; frameIndex < bufferSizeInFrames+k; ++frameIndex)
{
*buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // left
*buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // right
}
k+=bufferSizeInFrames;
hr = audioRenderClient->ReleaseBuffer(bufferSizeInFrames, flags);
assert(hr == S_OK);
if(k >= MyBufferLength/4)
{
k=0;
wavPlaybackSample=0;
}
}
}
每次按下一个键,我都会将相应的标志设置为 1,以便对包含样本的缓冲区求和。
我的版本和你的合成器版本之间的区别在于我的版本使用 88 个预加载缓冲区,其中包含真实钢琴的声音 (wav)。
int16_t* buffer;
int MyKeyDown[255];
int MyKeyCode[255];
short *fileBytes = new short[MyBufferLength];
void __fastcall TMyThread::Execute()
{
HRESULT hr;
int16_t* buffer;
HANDLE hEvent = NULL;
REFERENCE_TIME hnsRequestedDuration = 0;
DWORD flags = 0;
CoInitialize(NULL);
//CoInitializeEx( NULL, COINIT_MULTITHREADED );
IMMDeviceEnumerator *deviceEnumerator;
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
assert (hr == S_OK);
IMMDevice* audioDevice;
hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
assert(hr == S_OK);
deviceEnumerator->Release();
IAudioClient2* audioClient;
hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
assert(hr == S_OK);
audioDevice->Release();
WAVEFORMATEX wfx = {};
wfx.wFormatTag = WAVE_FORMAT_PCM;
wfx.nChannels = 2;
wfx.nSamplesPerSec = 44100;
wfx.wBitsPerSample = 16;
wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
hr = audioClient->GetDevicePeriod(NULL, &hnsRequestedDuration);
assert(hr == S_OK);
hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
hnsRequestedDuration,
hnsRequestedDuration,
&wfx,
NULL);
// If the requested buffer size is not aligned...
UINT32 nFrames = 0;
if(hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED)
{
// Get the next aligned frame.
hr = audioClient->GetBufferSize(&nFrames);
assert (hr == S_OK);
hnsRequestedDuration = (REFERENCE_TIME)
((10000.0 * 1000 / wfx.nSamplesPerSec * nFrames) + 0.5);
// Create a new audio client.
hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
assert(hr == S_OK);
// Open the stream and associate it with an audio session.
hr = audioClient->Initialize(
AUDCLNT_SHAREMODE_EXCLUSIVE,
AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
hnsRequestedDuration,
hnsRequestedDuration,
&wfx,
NULL);
assert(hr == S_OK);
}
hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
if (hEvent == NULL)
{
hr = E_FAIL;
ShowMessage("CreateEvent fail!!!");
}
hr = audioClient->SetEventHandle(hEvent);
assert(hr == S_OK);
IAudioRenderClient *audioRenderClient;
hr = audioClient->GetService(__uuidof(IAudioRenderClient),
(LPVOID*)(&audioRenderClient));
assert(hr == S_OK);
UINT32 bufferSizeInFrames;
hr = audioClient->GetBufferSize(&bufferSizeInFrames);
assert(hr == S_OK);
// from here play buffer
hr = audioClient->Start();
assert(hr == S_OK);
int wavPlaybackSample = 0;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
DWORD retval = WaitForSingleObject(hEvent, 2000);
UINT32 bufferPadding;
hr = audioClient->GetCurrentPadding(&bufferPadding);
assert(hr == S_OK);
UINT32 soundBufferLatency = bufferSizeInFrames / 1;
UINT32 numFramesToWrite = soundBufferLatency - bufferPadding;
FillBufferWasapi();
hr = audioRenderClient->GetBuffer(numFramesToWrite, (BYTE**)(&buffer));
assert(hr == S_OK);
for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
{
*buffer++ = fileBytes[wavPlaybackSample]; // left
*buffer++ = fileBytes[wavPlaybackSample]; // right
++wavPlaybackSample;
//wavPlaybackSample %= fileSize;
}
hr = audioRenderClient->ReleaseBuffer(numFramesToWrite, flags);
assert(hr == S_OK);
//Sleep((DWORD)(hnsRequestedDuration/10000000));
}
audioClient->Stop();
audioClient->Release();
audioRenderClient->Release();
CoUninitialize();
}
//---------------------------------------------------------------------------
void FillBufferWasapi()
{
for(int ii=0;ii<255;ii++)
{
if(MyKeyDown[ii] == 1)
{
for(int i=0; i<MyBufferLength;i++)
fileBytes[i]+=bfn[KeyCode[ii]][i];
}
}
}
//---------------------------------------------------------------------------
void __fastcall TForm1::AppMessage(MSG &Msg, bool &Handled)
{
MyKeyCode['Z']=3; // C1
MyKeyCode['X']=5; // D1
MyKeyCode['C']=7; // E1
switch (Msg.message)
{
case WM_KEYDOWN:
if(MyKeyDown[Msg.wParam] == 0)
{
MyKeyDown[Msg.wParam] = 1;
}
break;
case WM_KEYUP:
if(MyKeyDown[Msg.wParam] == 1)
{
MyKeyDown[Msg.wParam] = 0;
}
break;
}
}
这应该 99% 符合您的需求,它是一个使用 wasapi 的纯 c++ 示例播放器。
编译和link:
- 需要符合 c++17(+) 的编译器
- 安装boost库,用于无锁队列
- 可能需要 MS c++ 编译器(使用 conio.h)
- 实时音频线程参考avrt.lib(使用AvSetMmThreadPriority)
- 如有需要,full vs2019 project
至运行:
- 您需要 5 个 44100 16 位立体声格式的 .wav 文件,名称为 c4.wav 到 g4.wav。
- 见SamplePack
它的作用:
- 控制台应用程序 运行s 一个 getchar() 循环,c、d、e、f、g,触发音符打开,q 退出
- 因为它是一个控制台应用程序,所以没有注释消息。每次按键都会触发完整样本的播放。
- Note-down 被标记上时间戳并发布到共享队列(这是 boost 无锁的东西,大小上限为 64)。
- 因此,您可以通过在 3 毫秒间隔内按下超过 64 个键(最小 wasapi 独占延迟)来使其崩溃。
- 音频线程拾取这些消息,并将它们放入音频线程本地的“活动笔记”列表中。活动音符受最大复音 (64) 限制。
- 因此,您也可以在[最短样本的长度] 秒内按超过 64 个键使其崩溃。
- 将每个活动音符混合到当前 wasapi 缓冲区中,直到它到达 .wav 样本的末尾。
代码如下:
#include <atomic>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <fstream>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <boost/lockfree/queue.hpp>
#include <conio.h>
#include <atlbase.h>
#include <Windows.h>
#include <avrt.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
// for wasapi event callback
static HANDLE event_handle;
// sample data
static const size_t sample_count = 5;
static int16_t* note_samples[sample_count];
static size_t note_frame_counts[sample_count];
static std::vector<char> note_samples_raw[sample_count];
static char const* note_files[sample_count] = {
"c4.wav", "d4.wav", "e4.wav", "f4.wav", "g4.wav"
};
// user input / audio thread communication
static std::atomic_bool stop_finished;
static std::atomic_bool stop_initiated;
// scale mix volume
static const double mix_scale_amp = 0.4;
// debug stuff
static int32_t prev_note_active_count = 0;
static int32_t prev_note_audible_count = 0;
// timing stuff
static const int64_t millis_per_second = 1000;
static const int64_t reftimes_per_milli = 10000;
// audio format = 44.1khz 16bit stereo
static const int32_t sample_size = 2;
static const int32_t channel_count = 2;
static const int32_t sample_rate = 44100;
static const int32_t frame_size = sample_size * channel_count;
// exclusive mode event driven must use 128-byte aligned buffers
static const int32_t alignment_requirement_bytes = 128;
// note down notification + timestamp
static const size_t note_queue_size = 64;
struct note_down_msg
{
int32_t note; // 0..4 = c..g
uint64_t time_stamp_qpc;
};
static boost::lockfree::queue<note_down_msg>
note_msg_queue(note_queue_size);
// current playing notes
static const size_t max_polyphony = 64;
struct active_note
{
// slot in use?
bool in_use;
// note + timestamp
note_down_msg msg;
// position relative to stream pos when it should start
uint64_t trigger_pos_frames;
// how many of it has played already
size_t frames_rendered;
active_note() = default;
};
static active_note
active_notes[max_polyphony];
// shared by user input / audio thread
struct audio_thread_data
{
IAudioClock* clock;
IAudioClient* client;
IAudioRenderClient* render;
};
// bail out on any error
#define CHECK_COM(expr) do { \
HRESULT hr = expr; \
if(SUCCEEDED(hr)) break; \
std::cout << #expr << ": " << hr << "\n"; \
std::terminate(); \
} while(0)
static WAVEFORMATEXTENSIBLE
make_audio_format()
{
// translate format specification to WAVEFORMATEXTENSIBLE
WAVEFORMATEXTENSIBLE result = { 0 };
result.dwChannelMask = 0;
result.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
result.Samples.wValidBitsPerSample = sample_size * 8;
result.Format.nChannels = channel_count;
result.Format.nSamplesPerSec = sample_rate;
result.Format.wBitsPerSample = sample_size * 8;
result.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
result.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE);
result.Format.nBlockAlign = channel_count * sample_size;
result.Format.nAvgBytesPerSec = channel_count * sample_size * sample_rate;
return result;
}
static void
load_note_samples()
{
for(size_t i = 0; i < sample_count; i++)
{
// load piano samples to bytes
auto path = std::filesystem::current_path() / note_files[i];
std::ifstream input(path, std::ios::binary);
assert(input);
input.seekg(0, input.end);
size_t length = input.tellg();
input.seekg(0, input.beg);
note_samples_raw[i].reserve(length);
input.read(note_samples_raw[i].data(), length);
assert(input);
input.close();
// compute frame count and set actual audio data
// 44 bytes skipped for .WAV file header
note_frame_counts[i] = (length - 44) / (sample_size * channel_count);
note_samples[i] = reinterpret_cast<int16_t*>(note_samples_raw[i].data() + 44);
}
}
// this runs audio processing
static DWORD WINAPI
run_audio_thread(void* param)
{
int16_t* audio;
BYTE* audio_mem;
bool slot_found;
UINT32 buffer_frames;
HANDLE task;
BOOL success;
DWORD wait_result;
DWORD task_index = 0;
UINT64 clock_pos;
UINT64 clock_freq;
UINT64 clock_qpc_pos;
LARGE_INTEGER qpc_freq;
audio_thread_data* data = static_cast<audio_thread_data*>(param);
// init thread
CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
task = AvSetMmThreadCharacteristicsW(TEXT("Pro Audio"), &task_index);
assert(task != nullptr);
// wasapi buffer frame count & clock info
CHECK_COM(data->client->GetBufferSize(&buffer_frames));
CHECK_COM(data->clock->GetFrequency(&clock_freq));
success = QueryPerformanceFrequency(&qpc_freq);
assert(success);
// audio loop
data->client->Start();
while(!stop_initiated.load())
{
wait_result = WaitForSingleObject(event_handle, INFINITE);
assert(wait_result == WAIT_OBJECT_0);
// retrieve and clear buffer for this round
CHECK_COM(data->render->GetBuffer(buffer_frames, &audio_mem));
audio = reinterpret_cast<int16_t*>(audio_mem);
memset(audio, 0, buffer_frames * static_cast<uint64_t>(frame_size));
// get timing stuff
CHECK_COM(data->clock->GetPosition(&clock_pos, &clock_qpc_pos));
uint64_t stream_offset_hns = clock_pos * reftimes_per_milli * millis_per_second / clock_freq;
uint64_t stream_offset_frames = stream_offset_hns * sample_rate / (reftimes_per_milli * millis_per_second);
// process each frame
for(size_t f = 0; f < buffer_frames; f++)
{
// pop user input, find empty slot in active notes buffer
// for better performance this can also be outside the frame
// loop, at start of each buffer round, in that case add 1 additional buffer latency
note_down_msg msg;
while(note_msg_queue.pop(msg))
{
slot_found = false;
for(size_t i = 0; i < max_polyphony; i++)
if(!active_notes[i].in_use)
{
slot_found = true;
active_notes[i].msg = msg;
active_notes[i].in_use = true;
active_notes[i].frames_rendered = 0;
int64_t clock_note_diff_qpc = clock_qpc_pos - static_cast<int64_t>(active_notes[i].msg.time_stamp_qpc);
int64_t clock_note_diff_hns = clock_note_diff_qpc * reftimes_per_milli * millis_per_second / qpc_freq.QuadPart;
int64_t clock_note_diff_frames = clock_note_diff_hns * sample_rate / (reftimes_per_milli * millis_per_second);
int64_t note_clock_diff_frames = -static_cast<int64_t>(clock_note_diff_frames);
// allow 1 buffer latency otherwise notes would have to start in the past
active_notes[i].trigger_pos_frames = stream_offset_frames + note_clock_diff_frames + buffer_frames;
assert(active_notes[i].trigger_pos_frames <= stream_offset_frames + buffer_frames * 3);
assert(active_notes[i].trigger_pos_frames >= stream_offset_frames + f);
break;
}
if(!slot_found)
assert(!"Max polyphony reached.");
}
// debugging stuff
int32_t note_active_count = 0;
int32_t note_audible_count = 0;
// compose frame from all samples active up to max_polyphony
double current_samples[channel_count] = { 0 };
for(size_t i = 0; i < max_polyphony; i++)
{
// slot not in use
if(!active_notes[i].in_use) continue;
note_active_count++;
// not my turn yet
// note this very briefly wastes a slot for a sample which starts halfway in the current buffer
if(active_notes[i].trigger_pos_frames > stream_offset_frames + f) continue;
if(active_notes[i].frames_rendered == note_frame_counts[active_notes[i].msg.note])
{
// reached sample end
active_notes[i].in_use = false;
active_notes[i].frames_rendered = 0;
continue;
}
// note is active + audible
note_audible_count++;
size_t frame_index = active_notes[i].frames_rendered++;
for(size_t c = 0; c < channel_count; c++)
{
assert(active_notes[i].msg.note < sample_count);
assert(frame_index < note_frame_counts[active_notes[i].msg.note]);
current_samples[c] += static_cast<double>(note_samples[active_notes[i].msg.note][frame_index * channel_count + c] * mix_scale_amp) / SHRT_MAX;
}
}
// normally never do io on the audio thread, just debugging
if(prev_note_active_count != note_active_count || prev_note_audible_count != note_audible_count)
;//std::cout << "\nactive: " << note_active_count << " audible: " << note_audible_count << "\n";
prev_note_active_count = note_active_count;
prev_note_audible_count = note_audible_count;
// convert to int16 and write to wasapi
for(size_t c = 0; c < channel_count; c++)
audio[f * channel_count + c] = static_cast<int16_t>(current_samples[c] * SHRT_MAX);
}
CHECK_COM(data->render->ReleaseBuffer(buffer_frames, 0));
}
data->client->Stop();
// cleanup
success = AvRevertMmThreadCharacteristics(task);
assert(success);
CoUninitialize();
stop_finished.store(true);
return 0;
}
// this runs user input
static void
run_user_input_thread()
{
int32_t chr;
int32_t note;
BOOL success;
UINT32 buffer_frames;
REFERENCE_TIME engine;
REFERENCE_TIME period;
LARGE_INTEGER qpc_count;
CComPtr<IMMDevice> device;
CComPtr<IAudioClock> clock;
CComPtr<IAudioClient> client;
CComPtr<IAudioRenderClient> render;
CComPtr<IMMDeviceEnumerator> enumerator;
WAVEFORMATEXTENSIBLE format = make_audio_format();
// get default render endpoint
CHECK_COM(CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL,
__uuidof(IMMDeviceEnumerator), reinterpret_cast<void**>(&enumerator)));
CHECK_COM(enumerator->GetDefaultAudioEndpoint(eRender, eMultimedia, &device));
CHECK_COM(device->Activate(__uuidof(IAudioClient), CLSCTX_ALL,
nullptr, reinterpret_cast<void**>(&client)));
// open exclusive mode event driven stream
CHECK_COM(client->GetDevicePeriod(&engine, &period));
buffer_frames = static_cast<uint32_t>(period / reftimes_per_milli * sample_rate / millis_per_second);
while((buffer_frames * frame_size) % alignment_requirement_bytes != 0) buffer_frames++;
period = buffer_frames * millis_per_second * reftimes_per_milli / sample_rate;
CHECK_COM(client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
period, period, reinterpret_cast<WAVEFORMATEX*>(&format), nullptr));
event_handle = CreateEvent(nullptr, FALSE, FALSE, nullptr);
assert(event_handle != nullptr);
CHECK_COM(client->SetEventHandle(event_handle));
CHECK_COM(client->GetService(__uuidof(IAudioClock), reinterpret_cast<void**>(&clock)));
CHECK_COM(client->GetService(__uuidof(IAudioRenderClient), reinterpret_cast<void**>(&render)));
// start audio thread
audio_thread_data data = { 0 };
data.clock = clock;
data.client = client;
data.render = render;
CreateThread(nullptr, 0, run_audio_thread, &data, 0, nullptr);
// process user input
// cdefg = notes, q = quit
while((chr = _getch()) != 'q')
{
if(chr == 'c') note = 0;
else if(chr == 'd') note = 1;
else if(chr == 'e') note = 2;
else if(chr == 'f') note = 3;
else if(chr == 'g') note = 4;
else continue;
success = QueryPerformanceCounter(&qpc_count);
note_down_msg msg;
msg.note = note;
msg.time_stamp_qpc = qpc_count.QuadPart;
assert(success);
note_msg_queue.push(msg);
_putch(chr);
}
// cleanup
stop_initiated.store(true);
while(!stop_finished.load());
success = CloseHandle(event_handle);
assert(success);
}
int
main(int argc, char** argv)
{
// wraps COM init/cleanup
CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
load_note_samples();
run_user_input_thread();
CoUninitialize();
return 0;
}