如何以独占模式使用 WASAPI?

How do you use WASAPI in exclusive mode?

我正在编写一个钢琴模拟器,我不断地向 WAS 发送缓冲区API API。我正在尝试在 AUDCLNT_SHAREMODE_EXCLUSIVE 模式下执行此操作,但我仍然不明白如何处理它。

使用下面的代码,我为每个对 PlayBuf() 的调用实例化了一个单独的线程。

问题是在实例化第一个线程后,如果我尝试实例化第二个线程,则会出现 AUDCLNT_E_DEVICE_IN_USE 消息。

这当然是我的错,因为我还没有理解如何在 EXCLUSIVE 模式下使用 wasapi。

谢谢

void PlayBuf(short *fileBytes, int fileSize)
{
    HRESULT hr;
    IMMDeviceEnumerator *deviceEnumerator = NULL;
    IMMDevice* audioDevice;
    IAudioClient2* audioClient;
    WAVEFORMATEX wfx = {};
    IAudioRenderClient* audioRenderClient;
    UINT32 bufferSizeInFrames;
    UINT32 bufferPadding;
    int16_t* buffer;
    
    CoInitialize(NULL);

    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
    assert (hr == S_OK);

    hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
    assert(hr == S_OK);
    deviceEnumerator->Release();

    hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
    assert(hr == S_OK);
    audioDevice->Release();

    wfx.wFormatTag = WAVE_FORMAT_PCM;
    wfx.nChannels = 2;
    wfx.nSamplesPerSec = 44100;
    wfx.wBitsPerSample = 16;
    wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
    wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;

    const int64_t REFTIMES_PER_SEC = 10000000;
    REFERENCE_TIME requestedSoundBufferDuration = REFTIMES_PER_SEC * DurataSuono;
    DWORD initStreamFlags = ( AUDCLNT_STREAMFLAGS_RATEADJUST);

    hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,initStreamFlags,requestedSoundBufferDuration,0, &wfx, NULL);
    assert(hr == S_OK);

    hr = audioClient->GetService(__uuidof(IAudioRenderClient),
    (LPVOID*)(&audioRenderClient));
    assert(hr == S_OK);

    hr = audioClient->GetBufferSize(&bufferSizeInFrames);
    assert(hr == S_OK);


    audioClient->Reset();
    hr = audioClient->Start();
    assert(hr == S_OK);

    hr = audioRenderClient->GetBuffer(fileSize, (BYTE**)(&buffer));
    assert(hr == S_OK);

    hr = audioRenderClient->ReleaseBuffer(fileSize, 0);
    assert(hr == S_OK);

    Sleep(2000);

    audioClient->Stop();
    audioClient->Release();
    audioRenderClient->Release();
}

我花了一个小时为您准备了一个基本示例。它是在 C# 中使用我自己的音频 I/O 库 XT-Audio(所以打算插入)但在 C++ 中使用原始 wasapi 可能需要我半天时间。无论如何,我相信这非常接近您正在寻找的东西。正如您在下面看到的,这个应用程序拥有世界上最棒的 GUI:

只要您按下开始,应用程序就会开始将键盘输入转换为音频。您可以按住 c、d、e、f 和 g 键盘键来生成音符。它也处理多个重叠的音符(和弦)。我选择 select wasapi 共享模式作为后端,因为它支持浮点音频,但如果将音频转换为 16 位整数格式,这与独占模式同样有效。

使用此库与原始 wasapi 的区别在于,音频线程由库管理,应用程序会定期调用它的音频回调函数来合成音频数据。然而,这很容易转换回使用 c++ 的本机 wasapi:只需在后台线程的循环中调用 IAudioRenderClient::GetBuffer/ReleaseBuffer,并在这些调用之间进行处理。

无论如何,关键部分是:此应用程序仅使用 2 个线程,一个用于 UI(由 winforms 管理),一个用于音频(由音频库管理),但它能够同时演奏多个音符,我相信这是您问题的核心。

我在此处上传了完整的 visual studio 解决方案和二进制文件:WasapiSynthSample 但为了完整起见,我将 post 下面代码中有趣的部分。

using System;
using System.Threading;
using System.Windows.Forms;
using Xt;

namespace WasapiSynthSample
{
    public partial class Program : Form
    {
        // sampling rate
        const int Rate = 48000;        
        // stereo
        const int Channels = 2;
        // default format for wasapi shared mode
        const XtSample Sample = XtSample.Float32;
        // C, D, E, F, G
        static readonly float[] NoteFrequencies = { 523.25f, 587.33f, 659.25f, 698.46f, 783.99f };

        [STAThread]
        static void Main()
        {
            // initialize audio library
            using (var platform = XtAudio.Init(null, IntPtr.Zero, null))
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);
                Application.ThreadException += OnApplicationThreadException;
                AppDomain.CurrentDomain.UnhandledException += OnCurrentDomainUnhandledException;
                Application.Run(new Program(platform));
            }
        }

        // pop a messagebox on any error
        static void OnApplicationThreadException(object sender, ThreadExceptionEventArgs e)
        => OnError(e.Exception);
        static void OnCurrentDomainUnhandledException(object sender, UnhandledExceptionEventArgs e)
        => OnError((Exception)e.ExceptionObject);
        static void OnError(Exception e)
        {
            var text = e.ToString();
            if (e is XtException xte) text = XtAudio.GetErrorInfo(xte.GetError()).ToString();
            MessageBox.Show(text);
        }

        XtStream _stream;
        readonly XtPlatform _platform;

        // note phases
        readonly float[] _phases = new float[5];
        // tracks key down/up
        readonly bool[] _notesActive = new bool[5];

        public Program(XtPlatform platform)
        {
            _platform = platform;
            InitializeComponent();
        }

        // activate note
        protected override void OnKeyDown(KeyEventArgs e)
        {
            base.OnKeyDown(e);
            if (e.KeyCode == Keys.C) _notesActive[0] = true;
            if (e.KeyCode == Keys.D) _notesActive[1] = true;
            if (e.KeyCode == Keys.E) _notesActive[2] = true;
            if (e.KeyCode == Keys.F) _notesActive[3] = true;
            if (e.KeyCode == Keys.G) _notesActive[4] = true;
        }

        // deactive note
        protected override void OnKeyUp(KeyEventArgs e)
        {
            base.OnKeyUp(e);
            if (e.KeyCode == Keys.C) _notesActive[0] = false;
            if (e.KeyCode == Keys.D) _notesActive[1] = false;
            if (e.KeyCode == Keys.E) _notesActive[2] = false;
            if (e.KeyCode == Keys.F) _notesActive[3] = false;
            if (e.KeyCode == Keys.G) _notesActive[4] = false;
        }

        // stop stream
        void OnStop(object sender, EventArgs e)
        {
            _stream?.Stop();
            _stream?.Dispose();
            _stream = null;
            _start.Enabled = true;
            _stop.Enabled = false;
        }

        // start stream
        void OnStart(object sender, EventArgs e)
        {
            var service = _platform.GetService(XtSystem.WASAPI);
            var id = service.GetDefaultDeviceId(true);
            using (var device = service.OpenDevice(id))
            {
                var mix = new XtMix(Rate, Sample);
                var channels = new XtChannels(0, 0, Channels, 0);
                var format = new XtFormat(in mix, in channels);
                var buffer = device.GetBufferSize(in format).current;
                var streamParams = new XtStreamParams(true, OnBuffer, null, null);
                var deviceParams = new XtDeviceStreamParams(in streamParams, in format, buffer);
                _stream = device.OpenStream(in deviceParams, null);
                _stream.Start();
                _start.Enabled = false;
                _stop.Enabled = true;
            }
        }

        // this gets called on the audio thread by the audio library
        // but could just as well be your c++ code managing its own threads
        unsafe int OnBuffer(XtStream stream, in XtBuffer buffer, object user)
        {
            // process audio buffer of N frames
            for (int f = 0; f < buffer.frames; f++)
            {
                // compose current sample of all currently active notes
                float sample = 0.0f;
                for (int n = 0; n < NoteFrequencies.Length; n++)
                {
                    if (_notesActive[n])
                    {
                        _phases[n] += NoteFrequencies[n] / Rate;
                        if (_phases[n] >= 1.0f) _phases[n] = -1.0f;
                        float noteSample = (float)Math.Sin(2.0 * _phases[n] * Math.PI);
                        sample += noteSample / NoteFrequencies.Length;
                    }
                }

                // write current sample to output buffer
                for (int c = 0; c < Channels; c++)
                    ((float*)buffer.output)[f * Channels + c] = sample;
            }
            return 0;
        }
    }
}

我在线程代码中加入了FillBufferWasapi()是为了让我的代码更加清晰,我在实时应用方面经验不多,但是看不到错误

int wavPlaybackSample = 0;
int k=0;
while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
    DWORD retval = WaitForSingleObject(hEvent, 2000);

    for(int ii=0;ii<255;ii++)
    {
        if(MyKeyDown[ii] == 1)
        {
            hr = audioRenderClient->GetBuffer(bufferSizeInFrames, (BYTE**)(&buffer));
            assert(hr == S_OK);

            for (UINT32 frameIndex = 0+k; frameIndex < bufferSizeInFrames+k; ++frameIndex)
            {
                *buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // left
                *buffer++ = bfn[MyKeyCode[ii]][wavPlaybackSample++]; // right
            }

            k+=bufferSizeInFrames;

            hr = audioRenderClient->ReleaseBuffer(bufferSizeInFrames, flags);
            assert(hr == S_OK);

            if(k >= MyBufferLength/4)
            {
                k=0;
                wavPlaybackSample=0;
            }
        }
    }

每次按下一个键,我都会将相应的标志设置为 1,以便对包含样本的缓冲区求和。

我的版本和你的合成器版本之间的区别在于我的版本使用 88 个预加载缓冲区,其中包含真实钢琴的声音 (wav)。

int16_t* buffer;
int MyKeyDown[255];
int MyKeyCode[255];

short *fileBytes = new short[MyBufferLength];

void __fastcall TMyThread::Execute()
{
    HRESULT hr;
    int16_t* buffer;

    HANDLE hEvent = NULL;
    REFERENCE_TIME hnsRequestedDuration = 0;
    DWORD flags = 0;

    CoInitialize(NULL);
    //CoInitializeEx( NULL, COINIT_MULTITHREADED );

    IMMDeviceEnumerator *deviceEnumerator;
    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),NULL,CLSCTX_ALL, __uuidof(IMMDeviceEnumerator),(LPVOID *)(&deviceEnumerator));
    assert (hr == S_OK);

    IMMDevice* audioDevice;
    hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender,eConsole,&audioDevice);
    assert(hr == S_OK);
    deviceEnumerator->Release();

    IAudioClient2* audioClient;
    hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
    assert(hr == S_OK);
    audioDevice->Release();


    WAVEFORMATEX wfx = {};
    wfx.wFormatTag = WAVE_FORMAT_PCM;
    wfx.nChannels = 2;
    wfx.nSamplesPerSec = 44100;
    wfx.wBitsPerSample = 16;
    wfx.nBlockAlign = (wfx.nChannels * wfx.wBitsPerSample) / 8;
    wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;

    hr = audioClient->GetDevicePeriod(NULL, &hnsRequestedDuration);
    assert(hr == S_OK);

    hr = audioClient->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,
    AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
    hnsRequestedDuration,
    hnsRequestedDuration,
    &wfx,
    NULL);

    // If the requested buffer size is not aligned...
    UINT32 nFrames = 0;
    if(hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED)
    {
        // Get the next aligned frame.
        hr = audioClient->GetBufferSize(&nFrames);
        assert (hr == S_OK);

        hnsRequestedDuration = (REFERENCE_TIME)
        ((10000.0 * 1000 / wfx.nSamplesPerSec * nFrames) + 0.5);

        // Create a new audio client.
        hr = audioDevice->Activate(__uuidof(IAudioClient2),CLSCTX_ALL,NULL,(LPVOID*)(&audioClient));
        assert(hr == S_OK);

        // Open the stream and associate it with an audio session.
        hr = audioClient->Initialize(
        AUDCLNT_SHAREMODE_EXCLUSIVE,
        AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
        hnsRequestedDuration,
        hnsRequestedDuration,
        &wfx,
        NULL);
        assert(hr == S_OK);
    }

    hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
    if (hEvent == NULL)
    {
        hr = E_FAIL;
        ShowMessage("CreateEvent fail!!!");
    }

    hr = audioClient->SetEventHandle(hEvent);
    assert(hr == S_OK);

    IAudioRenderClient *audioRenderClient;
    hr = audioClient->GetService(__uuidof(IAudioRenderClient),
    (LPVOID*)(&audioRenderClient));
    assert(hr == S_OK);

    UINT32 bufferSizeInFrames;
    hr = audioClient->GetBufferSize(&bufferSizeInFrames);
    assert(hr == S_OK);

    // from here play buffer
    hr = audioClient->Start();
    assert(hr == S_OK);

    int wavPlaybackSample = 0;

    while(flags != AUDCLNT_BUFFERFLAGS_SILENT)
    {
        DWORD retval = WaitForSingleObject(hEvent, 2000);

        UINT32 bufferPadding;
        hr = audioClient->GetCurrentPadding(&bufferPadding);
        assert(hr == S_OK);

        UINT32 soundBufferLatency = bufferSizeInFrames / 1;
        UINT32 numFramesToWrite = soundBufferLatency - bufferPadding;
        
        FillBufferWasapi();

        hr = audioRenderClient->GetBuffer(numFramesToWrite, (BYTE**)(&buffer));
        assert(hr == S_OK);


        for (UINT32 frameIndex = 0; frameIndex < numFramesToWrite; ++frameIndex)
        {
            *buffer++ = fileBytes[wavPlaybackSample]; // left
            *buffer++ = fileBytes[wavPlaybackSample]; // right

            ++wavPlaybackSample;
            //wavPlaybackSample %= fileSize;
        }
        hr = audioRenderClient->ReleaseBuffer(numFramesToWrite, flags);
        assert(hr == S_OK);

        //Sleep((DWORD)(hnsRequestedDuration/10000000));
    }

    audioClient->Stop();
    audioClient->Release();
    audioRenderClient->Release();

    CoUninitialize();
}
//---------------------------------------------------------------------------

void FillBufferWasapi()
{
    for(int ii=0;ii<255;ii++)
    {
        if(MyKeyDown[ii] == 1)
        {
            for(int i=0; i<MyBufferLength;i++)
            fileBytes[i]+=bfn[KeyCode[ii]][i];
        }
    }
}
//---------------------------------------------------------------------------

void __fastcall TForm1::AppMessage(MSG &Msg, bool &Handled)
{
    MyKeyCode['Z']=3; // C1
    MyKeyCode['X']=5; // D1
    MyKeyCode['C']=7; // E1
    
    switch (Msg.message)
    {
    case WM_KEYDOWN:
        
        if(MyKeyDown[Msg.wParam] == 0)
        {
            MyKeyDown[Msg.wParam] = 1;
        }
        break;

    case WM_KEYUP:
        if(MyKeyDown[Msg.wParam] == 1)
        {
            MyKeyDown[Msg.wParam] = 0;
        }
        break;
    }
}

这应该 99% 符合您的需求,它是一个使用 wasapi 的纯 c++ 示例播放器。

编译和link:

  • 需要符合 c++17(+) 的编译器
  • 安装boost库,用于无锁队列
  • 可能需要 MS c++ 编译器(使用 conio.h)
  • 实时音频线程参考avrt.lib(使用AvSetMmThreadPriority)
  • 如有需要,full vs2019 project

至运行:

  • 您需要 5 个 44100 16 位立体声格式的 .wav 文件,名称为 c4.wav 到 g4.wav。
  • SamplePack

它的作用:

  • 控制台应用程序 运行s 一个 getchar() 循环,c、d、e、f、g,触发音符打开,q 退出
  • 因为它是一个控制台应用程序,所以没有注释消息。每次按键都会触发完整样本的播放。
  • Note-down 被标记上时间戳并发布到共享队列(这是 boost 无锁的东西,大小上限为 64)。
  • 因此,您可以通过在 3 毫秒间隔内按下超过 64 个键(最小 wasapi 独占延迟)来使其崩溃。
  • 音频线程拾取这些消息,并将它们放入音频线程本地的“活动笔记”列表中。活动音符受最大复音 (64) 限制。
  • 因此,您也可以在[最短样本的长度] 秒内按超过 64 个键使其崩溃。
  • 将每个活动音符混合到当前 wasapi 缓冲区中,直到它到达 .wav 样本的末尾。

代码如下:

#include <atomic>
#include <vector>
#include <cstdio>
#include <cstdint>
#include <cassert>
#include <fstream>
#include <cstring>
#include <iostream>
#include <filesystem>
#include <boost/lockfree/queue.hpp>

#include <conio.h>
#include <atlbase.h>
#include <Windows.h>
#include <avrt.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>

// for wasapi event callback
static HANDLE event_handle;

// sample data
static const size_t sample_count = 5;
static int16_t* note_samples[sample_count];
static size_t note_frame_counts[sample_count];
static std::vector<char> note_samples_raw[sample_count];
static char const* note_files[sample_count] = { 
  "c4.wav", "d4.wav", "e4.wav", "f4.wav", "g4.wav"
};

// user input / audio thread communication
static std::atomic_bool stop_finished;
static std::atomic_bool stop_initiated;

// scale mix volume
static const double mix_scale_amp = 0.4;

// debug stuff
static int32_t prev_note_active_count = 0;
static int32_t prev_note_audible_count = 0;

// timing stuff
static const int64_t millis_per_second = 1000;
static const int64_t reftimes_per_milli = 10000;

// audio format = 44.1khz 16bit stereo
static const int32_t sample_size = 2;
static const int32_t channel_count = 2;
static const int32_t sample_rate = 44100;
static const int32_t frame_size = sample_size * channel_count;

// exclusive mode event driven must use 128-byte aligned buffers
static const int32_t alignment_requirement_bytes = 128;

// note down notification + timestamp
static const size_t note_queue_size = 64;
struct note_down_msg
{
  int32_t note; // 0..4 = c..g
  uint64_t time_stamp_qpc;
};
static boost::lockfree::queue<note_down_msg> 
note_msg_queue(note_queue_size);

// current playing notes
static const size_t max_polyphony = 64;
struct active_note
{
  // slot in use?
  bool in_use;
  // note + timestamp
  note_down_msg msg;
  // position relative to stream pos when it should start
  uint64_t trigger_pos_frames;
  // how many of it has played already
  size_t frames_rendered;
  active_note() = default;
};
static active_note 
active_notes[max_polyphony];

// shared by user input / audio thread
struct audio_thread_data
{
  IAudioClock* clock;
  IAudioClient* client;
  IAudioRenderClient* render;
};

// bail out on any error
#define CHECK_COM(expr) do {                \
  HRESULT hr = expr;                        \
  if(SUCCEEDED(hr)) break;                  \
  std::cout << #expr << ": " << hr << "\n"; \
  std::terminate();                         \
} while(0)

static WAVEFORMATEXTENSIBLE
make_audio_format()
{
  // translate format specification to WAVEFORMATEXTENSIBLE
  WAVEFORMATEXTENSIBLE result = { 0 };
  result.dwChannelMask = 0;
  result.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  result.Samples.wValidBitsPerSample = sample_size * 8;
  result.Format.nChannels = channel_count;
  result.Format.nSamplesPerSec = sample_rate;
  result.Format.wBitsPerSample = sample_size * 8;
  result.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
  result.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE);
  result.Format.nBlockAlign = channel_count * sample_size;
  result.Format.nAvgBytesPerSec = channel_count * sample_size * sample_rate;
  return result;
}

static void
load_note_samples()
{
  for(size_t i = 0; i < sample_count; i++)
  {
    // load piano samples to bytes
    auto path = std::filesystem::current_path() / note_files[i];
    std::ifstream input(path, std::ios::binary);
    assert(input);
    input.seekg(0, input.end);
    size_t length = input.tellg();
    input.seekg(0, input.beg);
    note_samples_raw[i].reserve(length);    
    input.read(note_samples_raw[i].data(), length);
    assert(input);
    input.close();

    // compute frame count and set actual audio data
    // 44 bytes skipped for .WAV file header
    note_frame_counts[i] = (length - 44) / (sample_size * channel_count);
    note_samples[i] = reinterpret_cast<int16_t*>(note_samples_raw[i].data() + 44);
  }
}

// this runs audio processing
static DWORD WINAPI
run_audio_thread(void* param)
{
  int16_t* audio;
  BYTE* audio_mem;
  bool slot_found;
  UINT32 buffer_frames;

  HANDLE task;
  BOOL success;
  DWORD wait_result;
  DWORD task_index = 0;

  UINT64 clock_pos;
  UINT64 clock_freq;
  UINT64 clock_qpc_pos;
  LARGE_INTEGER qpc_freq;

  audio_thread_data* data = static_cast<audio_thread_data*>(param);

  // init thread
  CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
  task = AvSetMmThreadCharacteristicsW(TEXT("Pro Audio"), &task_index);
  assert(task != nullptr);

  // wasapi buffer frame count & clock info
  CHECK_COM(data->client->GetBufferSize(&buffer_frames));
  CHECK_COM(data->clock->GetFrequency(&clock_freq));
  success = QueryPerformanceFrequency(&qpc_freq);
  assert(success);

  // audio loop
  data->client->Start();
  while(!stop_initiated.load())
  {
    wait_result = WaitForSingleObject(event_handle, INFINITE);
    assert(wait_result == WAIT_OBJECT_0);

    // retrieve and clear buffer for this round
    CHECK_COM(data->render->GetBuffer(buffer_frames, &audio_mem));
    audio = reinterpret_cast<int16_t*>(audio_mem);
    memset(audio, 0, buffer_frames * static_cast<uint64_t>(frame_size));
    
    // get timing stuff
    CHECK_COM(data->clock->GetPosition(&clock_pos, &clock_qpc_pos));        
    uint64_t stream_offset_hns = clock_pos * reftimes_per_milli * millis_per_second / clock_freq;
    uint64_t stream_offset_frames = stream_offset_hns * sample_rate / (reftimes_per_milli * millis_per_second);

    // process each frame
    for(size_t f = 0; f < buffer_frames; f++)
    {
      // pop user input, find empty slot in active notes buffer
      // for better performance this can also be outside the frame
      // loop, at start of each buffer round, in that case add 1 additional buffer latency
      note_down_msg msg;
      while(note_msg_queue.pop(msg))
      {
        slot_found = false;
        for(size_t i = 0; i < max_polyphony; i++)
          if(!active_notes[i].in_use) 
          {
            slot_found = true;
            active_notes[i].msg = msg;
            active_notes[i].in_use = true;
            active_notes[i].frames_rendered = 0;
            int64_t clock_note_diff_qpc = clock_qpc_pos - static_cast<int64_t>(active_notes[i].msg.time_stamp_qpc);
            int64_t clock_note_diff_hns = clock_note_diff_qpc * reftimes_per_milli * millis_per_second / qpc_freq.QuadPart;
            int64_t clock_note_diff_frames = clock_note_diff_hns * sample_rate / (reftimes_per_milli * millis_per_second);
            int64_t note_clock_diff_frames = -static_cast<int64_t>(clock_note_diff_frames);
            // allow 1 buffer latency otherwise notes would have to start in the past
            active_notes[i].trigger_pos_frames = stream_offset_frames + note_clock_diff_frames + buffer_frames;
            assert(active_notes[i].trigger_pos_frames <= stream_offset_frames + buffer_frames * 3);
            assert(active_notes[i].trigger_pos_frames >= stream_offset_frames + f);
            break;
          }
        if(!slot_found)       
          assert(!"Max polyphony reached.");
      }
    
      // debugging stuff
      int32_t note_active_count = 0;
      int32_t note_audible_count = 0;      

      // compose frame from all samples active up to max_polyphony
      double current_samples[channel_count] = { 0 };
      for(size_t i = 0; i < max_polyphony; i++)
      {
        // slot not in use
        if(!active_notes[i].in_use) continue;
        note_active_count++;

        // not my turn yet
        // note this very briefly wastes a slot for a sample which starts halfway in the current buffer
        if(active_notes[i].trigger_pos_frames > stream_offset_frames + f) continue;

        if(active_notes[i].frames_rendered == note_frame_counts[active_notes[i].msg.note])
        {
          // reached sample end
          active_notes[i].in_use = false;
          active_notes[i].frames_rendered = 0;
          continue;
        }

        // note is active + audible
        note_audible_count++;
        size_t frame_index = active_notes[i].frames_rendered++;
        for(size_t c = 0; c < channel_count; c++)
        {
          assert(active_notes[i].msg.note < sample_count);
          assert(frame_index < note_frame_counts[active_notes[i].msg.note]);
          current_samples[c] += static_cast<double>(note_samples[active_notes[i].msg.note][frame_index * channel_count + c] * mix_scale_amp) / SHRT_MAX;        
        }
      }

      // normally never do io on the audio thread, just debugging
      if(prev_note_active_count != note_active_count || prev_note_audible_count != note_audible_count)
        ;//std::cout << "\nactive: " << note_active_count << " audible: " << note_audible_count << "\n";
      prev_note_active_count = note_active_count;
      prev_note_audible_count = note_audible_count;

      // convert to int16 and write to wasapi
      for(size_t c = 0; c < channel_count; c++)
        audio[f * channel_count + c] = static_cast<int16_t>(current_samples[c] * SHRT_MAX);
    }

    CHECK_COM(data->render->ReleaseBuffer(buffer_frames, 0));
  }
  data->client->Stop();

  // cleanup
  success = AvRevertMmThreadCharacteristics(task);
  assert(success);
  CoUninitialize();
  stop_finished.store(true);
  return 0;
}

// this runs user input
static void
run_user_input_thread()
{
  int32_t chr;
  int32_t note;
  BOOL success;
  UINT32 buffer_frames;
  REFERENCE_TIME engine;
  REFERENCE_TIME period;
  LARGE_INTEGER qpc_count;
  CComPtr<IMMDevice> device;
  CComPtr<IAudioClock> clock;
  CComPtr<IAudioClient> client;
  CComPtr<IAudioRenderClient> render;
  CComPtr<IMMDeviceEnumerator> enumerator;  
  WAVEFORMATEXTENSIBLE format = make_audio_format();

  // get default render endpoint
  CHECK_COM(CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, 
    __uuidof(IMMDeviceEnumerator), reinterpret_cast<void**>(&enumerator)));
  CHECK_COM(enumerator->GetDefaultAudioEndpoint(eRender, eMultimedia, &device));
  CHECK_COM(device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, 
    nullptr, reinterpret_cast<void**>(&client)));

  // open exclusive mode event driven stream
  CHECK_COM(client->GetDevicePeriod(&engine, &period));
  buffer_frames = static_cast<uint32_t>(period / reftimes_per_milli * sample_rate / millis_per_second);
  while((buffer_frames * frame_size) % alignment_requirement_bytes != 0) buffer_frames++;
  period = buffer_frames * millis_per_second * reftimes_per_milli / sample_rate;
  CHECK_COM(client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, 
    period, period, reinterpret_cast<WAVEFORMATEX*>(&format), nullptr));  
  event_handle = CreateEvent(nullptr, FALSE, FALSE, nullptr);
  assert(event_handle != nullptr);
  CHECK_COM(client->SetEventHandle(event_handle));
  CHECK_COM(client->GetService(__uuidof(IAudioClock), reinterpret_cast<void**>(&clock)));
  CHECK_COM(client->GetService(__uuidof(IAudioRenderClient), reinterpret_cast<void**>(&render)));

  // start audio thread
  audio_thread_data data = { 0 };
  data.clock = clock;
  data.client = client;
  data.render = render;
  CreateThread(nullptr, 0, run_audio_thread, &data, 0, nullptr);

  // process user input
  // cdefg = notes, q = quit
  while((chr = _getch()) != 'q')
  {
    if(chr == 'c') note = 0;
    else if(chr == 'd') note = 1;
    else if(chr == 'e') note = 2;
    else if(chr == 'f') note = 3;
    else if(chr == 'g') note = 4;
    else continue;
    success = QueryPerformanceCounter(&qpc_count);
    note_down_msg msg;
    msg.note = note;
    msg.time_stamp_qpc = qpc_count.QuadPart;
    assert(success);
    note_msg_queue.push(msg);
    _putch(chr);
  }

  // cleanup
  stop_initiated.store(true);
  while(!stop_finished.load());
  success = CloseHandle(event_handle);
  assert(success);
}

int 
main(int argc, char** argv)
{
  // wraps COM init/cleanup
  CHECK_COM(CoInitializeEx(nullptr, COINIT_APARTMENTTHREADED));
  load_note_samples();
  run_user_input_thread();
  CoUninitialize();
  return 0;
}