使用 UWP 监控实时音频并检测 gun-fire/clap 声音
Using UWP monitor live audio and detect gun-fire/clap sound
我正在开发一个新的 UWP 应用程序,它应该监视声音并在每次突然的声音响起时触发一个事件(类似于枪声或拍手声)。
- 需要启用默认音频输入和监听实时音频。
- 设置识别环境噪音和识别的音频灵敏度clap/gun-fire
- 当有高频声音像clap/gun-fire声音时(理想情况下它应该像配置的频率+/-40那么它是gun-fire/clap) 那么它应该调用一个事件。
无需保存音频
我试图实现 this
声音监控页面:
public sealed partial class MyPage : Page
{
private async void Page_Loaded(object sender, RoutedEventArgs e)
{
string deviceId = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Communications);
gameChatAudioStateMonitor = AudioStateMonitor.CreateForCaptureMonitoringWithCategoryAndDeviceId(MediaCategory.GameChat, deviceId);
gameChatAudioStateMonitor.SoundLevelChanged += GameChatSoundLevelChanged;
//other logic
}
}
音量变化:
private void GameChatSoundLevelChanged(AudioStateMonitor sender, object args)
{
switch (sender.SoundLevel)
{
case SoundLevel.Full:
LevelChangeEvent();
break;
case SoundLevel.Muted:
LevelChangeEvent();
break;
case SoundLevel.Low:
// Audio capture should never be "ducked", only muted or full volume.
Debug.WriteLine("Unexpected audio state change.");
break;
}
}
环境:windows10 (v1809)IDE:VS 2017
不确定这是否是正确的方法。这没有启用音频,也没有触发电平变化事件。
我在 WinForms 和 NAudio 教程中看到了其他选项 here。可能使用采样频率我可以检查事件...没有关于使用 NAudio 和 UWP 绘制图形和识别频率的必须教程。
更新:
遵循@Rob Caplan - MSFT 的建议,这就是我最终得到的结果
IMemoryBufferByteAccess.cs
// We are initializing a COM interface for use within the namespace
// This interface allows access to memory at the byte level which we need to populate audio data that is generated
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
unsafe interface IMemoryBufferByteAccess
{
void GetBuffer(out byte* buffer, out uint capacity);
}
GunFireMonitorPage.xaml.cs
public sealed partial class GunFireMonitorPage : Page
{
private MainPage _rootPage;
public static GunFireMonitorPage Current;
private AudioGraph _graph;
private AudioDeviceOutputNode _deviceOutputNode;
private AudioFrameInputNode _frameInputNode;
public double Theta;
public DrivePage()
{
InitializeComponent();
Current = this;
}
protected override async void OnNavigatedTo(NavigationEventArgs e)
{
_rootPage = MainPage.Current;
await CreateAudioGraph();
}
protected override void OnNavigatedFrom(NavigationEventArgs e)
{
_graph?.Dispose();
}
private void Page_Loaded(object sender, RoutedEventArgs e)
{
}
private unsafe AudioFrame GenerateAudioData(uint samples)
{
// Buffer size is (number of samples) * (size of each sample)
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels
uint bufferSize = samples * sizeof(float);
AudioFrame audioFrame = new AudioFrame(bufferSize);
using (AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Write))
using (IMemoryBufferReference reference = buffer.CreateReference())
{
// Get the buffer from the AudioFrame
// ReSharper disable once SuspiciousTypeConversion.Global
// ReSharper disable once UnusedVariable
((IMemoryBufferByteAccess) reference).GetBuffer(out var dataInBytes, out var capacityInBytes);
// Cast to float since the data we are generating is float
var dataInFloat = (float*)dataInBytes;
float freq = 1000; // choosing to generate frequency of 1kHz
float amplitude = 0.3f;
int sampleRate = (int)_graph.EncodingProperties.SampleRate;
double sampleIncrement = (freq * (Math.PI * 2)) / sampleRate;
// Generate a 1kHz sine wave and populate the values in the memory buffer
for (int i = 0; i < samples; i++)
{
double sinValue = amplitude * Math.Sin(Theta);
dataInFloat[i] = (float)sinValue;
Theta += sampleIncrement;
}
}
return audioFrame;
}
private void node_QuantumStarted(AudioFrameInputNode sender, FrameInputNodeQuantumStartedEventArgs args)
{
// GenerateAudioData can provide PCM audio data by directly synthesizing it or reading from a file.
// Need to know how many samples are required. In this case, the node is running at the same rate as the rest of the graph
// For minimum latency, only provide the required amount of samples. Extra samples will introduce additional latency.
uint numSamplesNeeded = (uint)args.RequiredSamples;
if (numSamplesNeeded != 0)
{
AudioFrame audioData = GenerateAudioData(numSamplesNeeded);
_frameInputNode.AddFrame(audioData);
}
}
private void Button_Click(object sender, RoutedEventArgs e)
{
if (generateButton.Content != null && generateButton.Content.Equals("Generate Audio"))
{
_frameInputNode.Start();
generateButton.Content = "Stop";
audioPipe.Fill = new SolidColorBrush(Colors.Blue);
}
else if (generateButton.Content != null && generateButton.Content.Equals("Stop"))
{
_frameInputNode.Stop();
generateButton.Content = "Generate Audio";
audioPipe.Fill = new SolidColorBrush(Color.FromArgb(255, 49, 49, 49));
}
}
private async Task CreateAudioGraph()
{
// Create an AudioGraph with default settings
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Media);
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings);
if (result.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
_rootPage.NotifyUser($"AudioGraph Creation Error because {result.Status.ToString()}", NotifyType.ErrorMessage);
return;
}
_graph = result.Graph;
// Create a device output node
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await _graph.CreateDeviceOutputNodeAsync();
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
_rootPage.NotifyUser(
$"Audio Device Output unavailable because {deviceOutputNodeResult.Status.ToString()}", NotifyType.ErrorMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Red);
}
_deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode;
_rootPage.NotifyUser("Device Output Node successfully created", NotifyType.StatusMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Green);
// Create the FrameInputNode at the same format as the graph, except explicitly set mono.
AudioEncodingProperties nodeEncodingProperties = _graph.EncodingProperties;
nodeEncodingProperties.ChannelCount = 1;
_frameInputNode = _graph.CreateFrameInputNode(nodeEncodingProperties);
_frameInputNode.AddOutgoingConnection(_deviceOutputNode);
frameContainer.Background = new SolidColorBrush(Colors.Green);
// Initialize the Frame Input Node in the stopped state
_frameInputNode.Stop();
// Hook up an event handler so we can start generating samples when needed
// This event is triggered when the node is required to provide data
_frameInputNode.QuantumStarted += node_QuantumStarted;
// Start the graph since we will only start/stop the frame input node
_graph.Start();
}
}
GunFireMonitorPage.xaml
<Page
x:Class="SmartPileInspector.xLite.GunFireMonitorPage"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d" Loaded="Page_Loaded"
HorizontalAlignment="Center"
Background="{ThemeResource ApplicationPageBackgroundThemeBrush}">
<ScrollViewer HorizontalAlignment="Center">
<StackPanel HorizontalAlignment="Center">
<!-- more page content -->
<Grid HorizontalAlignment="Center">
<Grid.ColumnDefinitions>
<ColumnDefinition Width="*"/>
<ColumnDefinition Width="*"/>
</Grid.ColumnDefinitions>
<Grid.RowDefinitions>
<RowDefinition Height="55"></RowDefinition>
</Grid.RowDefinitions>
</Grid>
<AppBarButton x:Name="generateButton" Content="Generate Audio" Click="Button_Click" MinWidth="120" MinHeight="45" Margin="0,50,0,0"/>
<Border x:Name="frameContainer" BorderThickness="0" Background="#4A4A4A" MinWidth="120" MinHeight="45" Margin="0,20,0,0">
<TextBlock x:Name="frame" Text="Frame Input" VerticalAlignment="Center" HorizontalAlignment="Center" />
</Border>
<StackPanel>
<Rectangle x:Name="audioPipe" Margin="0,20,0,0" Height="10" MinWidth="160" Fill="#313131" HorizontalAlignment="Stretch"/>
</StackPanel>
<Border x:Name="speakerContainer" BorderThickness="0" Background="#4A4A4A" MinWidth="120" MinHeight="45" Margin="0,20,0,0">
<TextBlock x:Name="speaker" Text="Output Device" VerticalAlignment="Center" HorizontalAlignment="Center" />
</Border>
<!--</AppBar>-->
</StackPanel>
</ScrollViewer>
</Page>
没有生成图表。并且有蓝线连续的哔哔声。
非常感谢任何帮助
更新:实现了 AudioVisualizer
在AudioVisualizer的帮助下,我能够绘制虱子音频图。
AudioGraph _graph;
AudioDeviceInputNode _inputNode;
PlaybackSource _source;
SourceConverter _converter;
protected override void OnNavigatedTo(NavigationEventArgs e)
{
_rootPage = MainPage.Current;
_rootPage.SetDimensions(700, 600);
base.OnNavigatedTo(e);
CreateAudioGraphAsync();
}
protected override void OnNavigatedFrom(NavigationEventArgs e)
{
base.OnNavigatedFrom(e);
_graph?.Stop();
_graph?.Dispose();
_graph = null;
}
async void CreateAudioGraphAsync()
{
var graphResult = await AudioGraph.CreateAsync(new AudioGraphSettings(Windows.Media.Render.AudioRenderCategory.Media));
if (graphResult.Status != AudioGraphCreationStatus.Success)
throw new InvalidOperationException($"Graph creation failed {graphResult.Status}");
_graph = graphResult.Graph;
var inputNodeResult = await _graph.CreateDeviceInputNodeAsync(MediaCategory.Media);
if (inputNodeResult.Status == AudioDeviceNodeCreationStatus.Success)
{
_inputNode = inputNodeResult.DeviceInputNode;
_source = PlaybackSource.CreateFromAudioNode(_inputNode);
_converter = new SourceConverter
{
Source = _source.Source,
MinFrequency = 110.0f,
MaxFrequency = 3520.0f,
FrequencyCount = 12 * 5 * 5,
FrequencyScale = ScaleType.Linear,
SpectrumRiseTime = TimeSpan.FromMilliseconds(20),
SpectrumFallTime = TimeSpan.FromMilliseconds(200),
RmsRiseTime = TimeSpan.FromMilliseconds(20),
RmsFallTime = TimeSpan.FromMilliseconds(500),
ChannelCount = 1
};
// Note A2
// Note A7
// 5 octaves, 5 bars per note
// Use RMS to gate noise, fast rise slow fall
NotesSpectrum.Source = _converter;
_graph.Start();
}
else
{
_rootPage.NotifyUser("Cannot access microphone", NotifyType.ErrorMessage);
}
}
现在的挑战是当波频率高于阈值时如何连接事件?在那种情况下,我想计算拍摄次数、时间戳及其强度。
示例声音
这是我的 Recording of live sound,就像你在这里一样,当有大锤子敲击时(每秒或更少),我想调用一个事件。
回答 "is this the right approach" 问题:不,AudioStateMonitor 对解决问题没有帮助。
AudioStateMonitor.SoundLevelChanged 告诉您系统是否正在回避您的声音以便不干扰其他东西。例如,它可以将音乐静音以支持电话铃声。 SoundLevelChanged 不会告诉您有关录制声音的音量或频率的任何信息,而这是检测您的掌声所需要的。
正确的方法是使用 AudioGraph(或 WASAPI,但不是来自 C#)将原始音频捕获到 AudioFrameOutputNode to process the signal and then run that through an FFT to detect sounds in your target frequencies and volumes. The AudioCreation sample 演示中,使用 AudioGraph,但不是特定的 AudioFrameOutputNode。
每 https://home.howstuffworks.com/clapper1.htm 拍手的频率范围为 2200Hz 至 2800Hz。
识别枪声看起来要复杂得多,不同的枪有非常不同的特征。快速搜索发现了几篇关于此的研究论文,而不是琐碎的算法。我怀疑您需要某种机器学习来对这些进行分类。这是之前讨论使用 ML 区分枪声和非枪声的话题:
您可以通过从中找到所有 pcm 数据的平均振幅来找到帧的分贝 frame.I 相信您想要创建一个处理输入的图形,看起来像这样
private static event LoudNoise<double>;
private static int quantum = 0;
static AudioGraph ingraph;
private static AudioDeviceInputNode deviceInputNode;
private static AudioFrameOutputNode frameOutputNode;
public static async Task<bool> CreateInputDeviceNode(string deviceId)
{
Console.WriteLine("Creating AudioGraphs");
// Create an AudioGraph with default settings
AudioGraphSettings graphsettings = new AudioGraphSettings(AudioRenderCategory.Media);
graphsettings.EncodingProperties = new AudioEncodingProperties();
graphsettings.EncodingProperties.Subtype = "Float";
graphsettings.EncodingProperties.SampleRate = 48000;
graphsettings.EncodingProperties.ChannelCount = 2;
graphsettings.EncodingProperties.BitsPerSample = 32;
graphsettings.EncodingProperties.Bitrate = 3072000;
//settings.DesiredSamplesPerQuantum = 960;
//settings.QuantumSizeSelectionMode = QuantumSizeSelectionMode.ClosestToDesired;
CreateAudioGraphResult graphresult = await AudioGraph.CreateAsync(graphsettings);
if (graphresult.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
return false;
}
ingraph = graphresult.Graph;AudioGraphSettings nodesettings = new AudioGraphSettings(AudioRenderCategory.GameChat);
nodesettings.EncodingProperties = AudioEncodingProperties.CreatePcm(48000, 2, 32);
nodesettings.DesiredSamplesPerQuantum = 960;
nodesettings.QuantumSizeSelectionMode = QuantumSizeSelectionMode.ClosestToDesired;
frameOutputNode = ingraph.CreateFrameOutputNode(ingraph.EncodingProperties);
quantum = 0;
ingraph.QuantumStarted += Graph_QuantumStarted;
DeviceInformation selectedDevice;
string device = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Default);
if (!string.IsNullOrEmpty(device))
{
selectedDevice = await DeviceInformation.CreateFromIdAsync(device);
} else
{
return false;
}
CreateAudioDeviceInputNodeResult result =
await ingraph.CreateDeviceInputNodeAsync(MediaCategory.Media, nodesettings.EncodingProperties, selectedDevice);
if (result.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
return false;
}
deviceInputNode = result.DeviceInputNode;
deviceInputNode.AddOutgoingConnection(frameOutputNode);
frameOutputNode.Start();
ingraph.Start();
return true;
}
private static void Graph_QuantumStarted(AudioGraph sender, object args)
{
if (++quantum % 2 == 0)
{
AudioFrame frame = frameOutputNode.GetFrame();
float[] dataInFloats;
using (AudioBuffer buffer = frame.LockBuffer(AudioBufferAccessMode.Write))
using (IMemoryBufferReference reference = buffer.CreateReference())
{
// Get the buffer from the AudioFrame
((IMemoryBufferByteAccess)reference).GetBuffer(out byte* dataInBytes, out uint capacityInBytes);
float* dataInFloat = (float*)dataInBytes;
dataInFloats = new float[capacityInBytes / sizeof(float)];
for (int i = 0; i < capacityInBytes / sizeof(float); i++)
{
dataInFloats[i] = dataInFloat[i];
}
}
double decibels = 0f;
foreach (var sample in dataInFloats)
{
decibels += Math.Abs(sample);
}
decibels = 20 * Math.Log10(decibels / dataInFloats.Length);
// You can pass the decibel value where ever you'd like from here
if (decibels > 10)
{
LoudNoise?.Invoke(this, decibels);
}
}
}
P.S。我做了所有这些静态的,但如果它们都在同一个实例中,它自然会起作用
我还从我自己的项目中复制了一部分,所以它可能有一些我忘记了的部分 trim。希望对你有帮助
我正在开发一个新的 UWP 应用程序,它应该监视声音并在每次突然的声音响起时触发一个事件(类似于枪声或拍手声)。
- 需要启用默认音频输入和监听实时音频。
- 设置识别环境噪音和识别的音频灵敏度clap/gun-fire
- 当有高频声音像clap/gun-fire声音时(理想情况下它应该像配置的频率+/-40那么它是gun-fire/clap) 那么它应该调用一个事件。
无需保存音频 我试图实现 this
声音监控页面:
public sealed partial class MyPage : Page
{
private async void Page_Loaded(object sender, RoutedEventArgs e)
{
string deviceId = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Communications);
gameChatAudioStateMonitor = AudioStateMonitor.CreateForCaptureMonitoringWithCategoryAndDeviceId(MediaCategory.GameChat, deviceId);
gameChatAudioStateMonitor.SoundLevelChanged += GameChatSoundLevelChanged;
//other logic
}
}
音量变化:
private void GameChatSoundLevelChanged(AudioStateMonitor sender, object args)
{
switch (sender.SoundLevel)
{
case SoundLevel.Full:
LevelChangeEvent();
break;
case SoundLevel.Muted:
LevelChangeEvent();
break;
case SoundLevel.Low:
// Audio capture should never be "ducked", only muted or full volume.
Debug.WriteLine("Unexpected audio state change.");
break;
}
}
环境:windows10 (v1809)IDE:VS 2017
不确定这是否是正确的方法。这没有启用音频,也没有触发电平变化事件。
我在 WinForms 和 NAudio 教程中看到了其他选项 here。可能使用采样频率我可以检查事件...没有关于使用 NAudio 和 UWP 绘制图形和识别频率的必须教程。
更新:
遵循@Rob Caplan - MSFT 的建议,这就是我最终得到的结果
IMemoryBufferByteAccess.cs
// We are initializing a COM interface for use within the namespace
// This interface allows access to memory at the byte level which we need to populate audio data that is generated
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
unsafe interface IMemoryBufferByteAccess
{
void GetBuffer(out byte* buffer, out uint capacity);
}
GunFireMonitorPage.xaml.cs
public sealed partial class GunFireMonitorPage : Page
{
private MainPage _rootPage;
public static GunFireMonitorPage Current;
private AudioGraph _graph;
private AudioDeviceOutputNode _deviceOutputNode;
private AudioFrameInputNode _frameInputNode;
public double Theta;
public DrivePage()
{
InitializeComponent();
Current = this;
}
protected override async void OnNavigatedTo(NavigationEventArgs e)
{
_rootPage = MainPage.Current;
await CreateAudioGraph();
}
protected override void OnNavigatedFrom(NavigationEventArgs e)
{
_graph?.Dispose();
}
private void Page_Loaded(object sender, RoutedEventArgs e)
{
}
private unsafe AudioFrame GenerateAudioData(uint samples)
{
// Buffer size is (number of samples) * (size of each sample)
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels
uint bufferSize = samples * sizeof(float);
AudioFrame audioFrame = new AudioFrame(bufferSize);
using (AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Write))
using (IMemoryBufferReference reference = buffer.CreateReference())
{
// Get the buffer from the AudioFrame
// ReSharper disable once SuspiciousTypeConversion.Global
// ReSharper disable once UnusedVariable
((IMemoryBufferByteAccess) reference).GetBuffer(out var dataInBytes, out var capacityInBytes);
// Cast to float since the data we are generating is float
var dataInFloat = (float*)dataInBytes;
float freq = 1000; // choosing to generate frequency of 1kHz
float amplitude = 0.3f;
int sampleRate = (int)_graph.EncodingProperties.SampleRate;
double sampleIncrement = (freq * (Math.PI * 2)) / sampleRate;
// Generate a 1kHz sine wave and populate the values in the memory buffer
for (int i = 0; i < samples; i++)
{
double sinValue = amplitude * Math.Sin(Theta);
dataInFloat[i] = (float)sinValue;
Theta += sampleIncrement;
}
}
return audioFrame;
}
private void node_QuantumStarted(AudioFrameInputNode sender, FrameInputNodeQuantumStartedEventArgs args)
{
// GenerateAudioData can provide PCM audio data by directly synthesizing it or reading from a file.
// Need to know how many samples are required. In this case, the node is running at the same rate as the rest of the graph
// For minimum latency, only provide the required amount of samples. Extra samples will introduce additional latency.
uint numSamplesNeeded = (uint)args.RequiredSamples;
if (numSamplesNeeded != 0)
{
AudioFrame audioData = GenerateAudioData(numSamplesNeeded);
_frameInputNode.AddFrame(audioData);
}
}
private void Button_Click(object sender, RoutedEventArgs e)
{
if (generateButton.Content != null && generateButton.Content.Equals("Generate Audio"))
{
_frameInputNode.Start();
generateButton.Content = "Stop";
audioPipe.Fill = new SolidColorBrush(Colors.Blue);
}
else if (generateButton.Content != null && generateButton.Content.Equals("Stop"))
{
_frameInputNode.Stop();
generateButton.Content = "Generate Audio";
audioPipe.Fill = new SolidColorBrush(Color.FromArgb(255, 49, 49, 49));
}
}
private async Task CreateAudioGraph()
{
// Create an AudioGraph with default settings
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Media);
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings);
if (result.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
_rootPage.NotifyUser($"AudioGraph Creation Error because {result.Status.ToString()}", NotifyType.ErrorMessage);
return;
}
_graph = result.Graph;
// Create a device output node
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await _graph.CreateDeviceOutputNodeAsync();
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
_rootPage.NotifyUser(
$"Audio Device Output unavailable because {deviceOutputNodeResult.Status.ToString()}", NotifyType.ErrorMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Red);
}
_deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode;
_rootPage.NotifyUser("Device Output Node successfully created", NotifyType.StatusMessage);
speakerContainer.Background = new SolidColorBrush(Colors.Green);
// Create the FrameInputNode at the same format as the graph, except explicitly set mono.
AudioEncodingProperties nodeEncodingProperties = _graph.EncodingProperties;
nodeEncodingProperties.ChannelCount = 1;
_frameInputNode = _graph.CreateFrameInputNode(nodeEncodingProperties);
_frameInputNode.AddOutgoingConnection(_deviceOutputNode);
frameContainer.Background = new SolidColorBrush(Colors.Green);
// Initialize the Frame Input Node in the stopped state
_frameInputNode.Stop();
// Hook up an event handler so we can start generating samples when needed
// This event is triggered when the node is required to provide data
_frameInputNode.QuantumStarted += node_QuantumStarted;
// Start the graph since we will only start/stop the frame input node
_graph.Start();
}
}
GunFireMonitorPage.xaml
<Page
x:Class="SmartPileInspector.xLite.GunFireMonitorPage"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
mc:Ignorable="d" Loaded="Page_Loaded"
HorizontalAlignment="Center"
Background="{ThemeResource ApplicationPageBackgroundThemeBrush}">
<ScrollViewer HorizontalAlignment="Center">
<StackPanel HorizontalAlignment="Center">
<!-- more page content -->
<Grid HorizontalAlignment="Center">
<Grid.ColumnDefinitions>
<ColumnDefinition Width="*"/>
<ColumnDefinition Width="*"/>
</Grid.ColumnDefinitions>
<Grid.RowDefinitions>
<RowDefinition Height="55"></RowDefinition>
</Grid.RowDefinitions>
</Grid>
<AppBarButton x:Name="generateButton" Content="Generate Audio" Click="Button_Click" MinWidth="120" MinHeight="45" Margin="0,50,0,0"/>
<Border x:Name="frameContainer" BorderThickness="0" Background="#4A4A4A" MinWidth="120" MinHeight="45" Margin="0,20,0,0">
<TextBlock x:Name="frame" Text="Frame Input" VerticalAlignment="Center" HorizontalAlignment="Center" />
</Border>
<StackPanel>
<Rectangle x:Name="audioPipe" Margin="0,20,0,0" Height="10" MinWidth="160" Fill="#313131" HorizontalAlignment="Stretch"/>
</StackPanel>
<Border x:Name="speakerContainer" BorderThickness="0" Background="#4A4A4A" MinWidth="120" MinHeight="45" Margin="0,20,0,0">
<TextBlock x:Name="speaker" Text="Output Device" VerticalAlignment="Center" HorizontalAlignment="Center" />
</Border>
<!--</AppBar>-->
</StackPanel>
</ScrollViewer>
</Page>
没有生成图表。并且有蓝线连续的哔哔声。 非常感谢任何帮助
更新:实现了 AudioVisualizer
在AudioVisualizer的帮助下,我能够绘制虱子音频图。
AudioGraph _graph;
AudioDeviceInputNode _inputNode;
PlaybackSource _source;
SourceConverter _converter;
protected override void OnNavigatedTo(NavigationEventArgs e)
{
_rootPage = MainPage.Current;
_rootPage.SetDimensions(700, 600);
base.OnNavigatedTo(e);
CreateAudioGraphAsync();
}
protected override void OnNavigatedFrom(NavigationEventArgs e)
{
base.OnNavigatedFrom(e);
_graph?.Stop();
_graph?.Dispose();
_graph = null;
}
async void CreateAudioGraphAsync()
{
var graphResult = await AudioGraph.CreateAsync(new AudioGraphSettings(Windows.Media.Render.AudioRenderCategory.Media));
if (graphResult.Status != AudioGraphCreationStatus.Success)
throw new InvalidOperationException($"Graph creation failed {graphResult.Status}");
_graph = graphResult.Graph;
var inputNodeResult = await _graph.CreateDeviceInputNodeAsync(MediaCategory.Media);
if (inputNodeResult.Status == AudioDeviceNodeCreationStatus.Success)
{
_inputNode = inputNodeResult.DeviceInputNode;
_source = PlaybackSource.CreateFromAudioNode(_inputNode);
_converter = new SourceConverter
{
Source = _source.Source,
MinFrequency = 110.0f,
MaxFrequency = 3520.0f,
FrequencyCount = 12 * 5 * 5,
FrequencyScale = ScaleType.Linear,
SpectrumRiseTime = TimeSpan.FromMilliseconds(20),
SpectrumFallTime = TimeSpan.FromMilliseconds(200),
RmsRiseTime = TimeSpan.FromMilliseconds(20),
RmsFallTime = TimeSpan.FromMilliseconds(500),
ChannelCount = 1
};
// Note A2
// Note A7
// 5 octaves, 5 bars per note
// Use RMS to gate noise, fast rise slow fall
NotesSpectrum.Source = _converter;
_graph.Start();
}
else
{
_rootPage.NotifyUser("Cannot access microphone", NotifyType.ErrorMessage);
}
}
现在的挑战是当波频率高于阈值时如何连接事件?在那种情况下,我想计算拍摄次数、时间戳及其强度。
示例声音
这是我的 Recording of live sound,就像你在这里一样,当有大锤子敲击时(每秒或更少),我想调用一个事件。
回答 "is this the right approach" 问题:不,AudioStateMonitor 对解决问题没有帮助。
AudioStateMonitor.SoundLevelChanged 告诉您系统是否正在回避您的声音以便不干扰其他东西。例如,它可以将音乐静音以支持电话铃声。 SoundLevelChanged 不会告诉您有关录制声音的音量或频率的任何信息,而这是检测您的掌声所需要的。
正确的方法是使用 AudioGraph(或 WASAPI,但不是来自 C#)将原始音频捕获到 AudioFrameOutputNode to process the signal and then run that through an FFT to detect sounds in your target frequencies and volumes. The AudioCreation sample 演示中,使用 AudioGraph,但不是特定的 AudioFrameOutputNode。
每 https://home.howstuffworks.com/clapper1.htm 拍手的频率范围为 2200Hz 至 2800Hz。
识别枪声看起来要复杂得多,不同的枪有非常不同的特征。快速搜索发现了几篇关于此的研究论文,而不是琐碎的算法。我怀疑您需要某种机器学习来对这些进行分类。这是之前讨论使用 ML 区分枪声和非枪声的话题:
您可以通过从中找到所有 pcm 数据的平均振幅来找到帧的分贝 frame.I 相信您想要创建一个处理输入的图形,看起来像这样
private static event LoudNoise<double>;
private static int quantum = 0;
static AudioGraph ingraph;
private static AudioDeviceInputNode deviceInputNode;
private static AudioFrameOutputNode frameOutputNode;
public static async Task<bool> CreateInputDeviceNode(string deviceId)
{
Console.WriteLine("Creating AudioGraphs");
// Create an AudioGraph with default settings
AudioGraphSettings graphsettings = new AudioGraphSettings(AudioRenderCategory.Media);
graphsettings.EncodingProperties = new AudioEncodingProperties();
graphsettings.EncodingProperties.Subtype = "Float";
graphsettings.EncodingProperties.SampleRate = 48000;
graphsettings.EncodingProperties.ChannelCount = 2;
graphsettings.EncodingProperties.BitsPerSample = 32;
graphsettings.EncodingProperties.Bitrate = 3072000;
//settings.DesiredSamplesPerQuantum = 960;
//settings.QuantumSizeSelectionMode = QuantumSizeSelectionMode.ClosestToDesired;
CreateAudioGraphResult graphresult = await AudioGraph.CreateAsync(graphsettings);
if (graphresult.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
return false;
}
ingraph = graphresult.Graph;AudioGraphSettings nodesettings = new AudioGraphSettings(AudioRenderCategory.GameChat);
nodesettings.EncodingProperties = AudioEncodingProperties.CreatePcm(48000, 2, 32);
nodesettings.DesiredSamplesPerQuantum = 960;
nodesettings.QuantumSizeSelectionMode = QuantumSizeSelectionMode.ClosestToDesired;
frameOutputNode = ingraph.CreateFrameOutputNode(ingraph.EncodingProperties);
quantum = 0;
ingraph.QuantumStarted += Graph_QuantumStarted;
DeviceInformation selectedDevice;
string device = Windows.Media.Devices.MediaDevice.GetDefaultAudioCaptureId(Windows.Media.Devices.AudioDeviceRole.Default);
if (!string.IsNullOrEmpty(device))
{
selectedDevice = await DeviceInformation.CreateFromIdAsync(device);
} else
{
return false;
}
CreateAudioDeviceInputNodeResult result =
await ingraph.CreateDeviceInputNodeAsync(MediaCategory.Media, nodesettings.EncodingProperties, selectedDevice);
if (result.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
return false;
}
deviceInputNode = result.DeviceInputNode;
deviceInputNode.AddOutgoingConnection(frameOutputNode);
frameOutputNode.Start();
ingraph.Start();
return true;
}
private static void Graph_QuantumStarted(AudioGraph sender, object args)
{
if (++quantum % 2 == 0)
{
AudioFrame frame = frameOutputNode.GetFrame();
float[] dataInFloats;
using (AudioBuffer buffer = frame.LockBuffer(AudioBufferAccessMode.Write))
using (IMemoryBufferReference reference = buffer.CreateReference())
{
// Get the buffer from the AudioFrame
((IMemoryBufferByteAccess)reference).GetBuffer(out byte* dataInBytes, out uint capacityInBytes);
float* dataInFloat = (float*)dataInBytes;
dataInFloats = new float[capacityInBytes / sizeof(float)];
for (int i = 0; i < capacityInBytes / sizeof(float); i++)
{
dataInFloats[i] = dataInFloat[i];
}
}
double decibels = 0f;
foreach (var sample in dataInFloats)
{
decibels += Math.Abs(sample);
}
decibels = 20 * Math.Log10(decibels / dataInFloats.Length);
// You can pass the decibel value where ever you'd like from here
if (decibels > 10)
{
LoudNoise?.Invoke(this, decibels);
}
}
}
P.S。我做了所有这些静态的,但如果它们都在同一个实例中,它自然会起作用
我还从我自己的项目中复制了一部分,所以它可能有一些我忘记了的部分 trim。希望对你有帮助