将 WAV 录制到 IBM Watson Speech-To-Text
Recording WAV to IBM Watson Speech-To-Text
我正在尝试录制音频并立即将其发送到 IBM Watson Speech-To-Text 进行转录。我已经使用从磁盘加载的 WAV 文件测试了 Watson,并且成功了。另一方面,我还测试了从麦克风录音并将其存储到磁盘,效果也很好。
但是当我尝试使用 NAudio WaveIn 录制音频时,Watson 的结果是空的,就好像没有音频一样。
谁能对此有所启发,或者有人有一些想法?
private async void StartHere()
{
var ws = new ClientWebSocket();
ws.Options.Credentials = new NetworkCredential("*****", "*****");
await ws.ConnectAsync(new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?model=en-US_NarrowbandModel"), CancellationToken.None);
Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws));
Record();
}
public void Record()
{
var waveIn = new WaveInEvent
{
BufferMilliseconds = 50,
DeviceNumber = 0,
WaveFormat = format
};
waveIn.DataAvailable += new EventHandler(WaveIn_DataAvailable);
waveIn.RecordingStopped += new EventHandler(WaveIn_RecordingStopped);
waveIn.StartRecording();
}
public void Stop()
{
await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None);
}
public void Close()
{
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait();
}
private void WaveIn_DataAvailable(object sender, WaveInEventArgs e)
{
await ws.SendAsync(new ArraySegment(e.Buffer), WebSocketMessageType.Binary, true, CancellationToken.None);
}
private async Task HandleResults(ClientWebSocket ws)
{
var buffer = new byte[1024];
while (true)
{
var segment = new ArraySegment(buffer);
var result = await ws.ReceiveAsync(segment, CancellationToken.None);
if (result.MessageType == WebSocketMessageType.Close)
{
return;
}
int count = result.Count;
while (!result.EndOfMessage)
{
if (count >= buffer.Length)
{
await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None);
return;
}
segment = new ArraySegment(buffer, count, buffer.Length - count);
result = await ws.ReceiveAsync(segment, CancellationToken.None);
count += result.Count;
}
var message = Encoding.UTF8.GetString(buffer, 0, count);
// you'll probably want to parse the JSON into a useful object here,
// see ServiceState and IsDelimeter for a light-weight example of that.
Console.WriteLine(message);
if (IsDelimeter(message))
{
return;
}
}
}
private bool IsDelimeter(String json)
{
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState));
ServiceState obj = (ServiceState) ser.ReadObject(stream);
return obj.state == "listening";
}
[DataContract]
internal class ServiceState
{
[DataMember]
public string state = "";
}
编辑:我也试过在开始录音之前发送 WAV "header",像这样
waveIn.DataAvailable += new EventHandler(WaveIn_DataAvailable);
waveIn.RecordingStopped += new EventHandler(WaveIn_RecordingStopped);
/* Send WAV "header" first */
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream, Encoding.UTF8))
{
writer.Write(Encoding.UTF8.GetBytes("RIFF"));
writer.Write(0); // placeholder
writer.Write(Encoding.UTF8.GetBytes("WAVE"));
writer.Write(Encoding.UTF8.GetBytes("fmt "));
format.Serialize(writer);
if (format.Encoding != WaveFormatEncoding.Pcm && format.BitsPerSample != 0)
{
writer.Write(Encoding.UTF8.GetBytes("fact"));
writer.Write(4);
writer.Write(0);
}
writer.Write(Encoding.UTF8.GetBytes("data"));
writer.Write(0);
writer.Flush();
}
byte[] header = stream.ToArray();
await ws.SendAsync(new ArraySegment(header), WebSocketMessageType.Binary, true, CancellationToken.None);
}
/* End WAV header */
waveIn.StartRecording();
经过 ~20 小时的反复试验找到了解决方案,我创建了一个 GitHub Gist,因为它可能对其他人很方便。参见 https://gist.github.com/kboek/20476c2a03b5e9188edebaace74f9a07
我正在尝试录制音频并立即将其发送到 IBM Watson Speech-To-Text 进行转录。我已经使用从磁盘加载的 WAV 文件测试了 Watson,并且成功了。另一方面,我还测试了从麦克风录音并将其存储到磁盘,效果也很好。
但是当我尝试使用 NAudio WaveIn 录制音频时,Watson 的结果是空的,就好像没有音频一样。
谁能对此有所启发,或者有人有一些想法?
private async void StartHere()
{
var ws = new ClientWebSocket();
ws.Options.Credentials = new NetworkCredential("*****", "*****");
await ws.ConnectAsync(new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?model=en-US_NarrowbandModel"), CancellationToken.None);
Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws));
Record();
}
public void Record()
{
var waveIn = new WaveInEvent
{
BufferMilliseconds = 50,
DeviceNumber = 0,
WaveFormat = format
};
waveIn.DataAvailable += new EventHandler(WaveIn_DataAvailable);
waveIn.RecordingStopped += new EventHandler(WaveIn_RecordingStopped);
waveIn.StartRecording();
}
public void Stop()
{
await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None);
}
public void Close()
{
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait();
}
private void WaveIn_DataAvailable(object sender, WaveInEventArgs e)
{
await ws.SendAsync(new ArraySegment(e.Buffer), WebSocketMessageType.Binary, true, CancellationToken.None);
}
private async Task HandleResults(ClientWebSocket ws)
{
var buffer = new byte[1024];
while (true)
{
var segment = new ArraySegment(buffer);
var result = await ws.ReceiveAsync(segment, CancellationToken.None);
if (result.MessageType == WebSocketMessageType.Close)
{
return;
}
int count = result.Count;
while (!result.EndOfMessage)
{
if (count >= buffer.Length)
{
await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None);
return;
}
segment = new ArraySegment(buffer, count, buffer.Length - count);
result = await ws.ReceiveAsync(segment, CancellationToken.None);
count += result.Count;
}
var message = Encoding.UTF8.GetString(buffer, 0, count);
// you'll probably want to parse the JSON into a useful object here,
// see ServiceState and IsDelimeter for a light-weight example of that.
Console.WriteLine(message);
if (IsDelimeter(message))
{
return;
}
}
}
private bool IsDelimeter(String json)
{
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState));
ServiceState obj = (ServiceState) ser.ReadObject(stream);
return obj.state == "listening";
}
[DataContract]
internal class ServiceState
{
[DataMember]
public string state = "";
}
编辑:我也试过在开始录音之前发送 WAV "header",像这样
waveIn.DataAvailable += new EventHandler(WaveIn_DataAvailable);
waveIn.RecordingStopped += new EventHandler(WaveIn_RecordingStopped);
/* Send WAV "header" first */
using (var stream = new MemoryStream())
{
using (var writer = new BinaryWriter(stream, Encoding.UTF8))
{
writer.Write(Encoding.UTF8.GetBytes("RIFF"));
writer.Write(0); // placeholder
writer.Write(Encoding.UTF8.GetBytes("WAVE"));
writer.Write(Encoding.UTF8.GetBytes("fmt "));
format.Serialize(writer);
if (format.Encoding != WaveFormatEncoding.Pcm && format.BitsPerSample != 0)
{
writer.Write(Encoding.UTF8.GetBytes("fact"));
writer.Write(4);
writer.Write(0);
}
writer.Write(Encoding.UTF8.GetBytes("data"));
writer.Write(0);
writer.Flush();
}
byte[] header = stream.ToArray();
await ws.SendAsync(new ArraySegment(header), WebSocketMessageType.Binary, true, CancellationToken.None);
}
/* End WAV header */
waveIn.StartRecording();
经过 ~20 小时的反复试验找到了解决方案,我创建了一个 GitHub Gist,因为它可能对其他人很方便。参见 https://gist.github.com/kboek/20476c2a03b5e9188edebaace74f9a07