语音文本 Ibm Watson C#,长音频超过 100 MB
Speech to text Ibm Watson C# with long audio more than 100 MB
我正在实施 C# 代码,我试图在其中转录超过 100 mb 的音频,但它不允许我开发需要可以在 C# 中发送超过 100 mb 音频的程序
在此代码中,我使用的是网络套接字,但我如何发送音频流
public static void CallWatson()
{
using (var nf = new Notifier())
using (var ws = new WebSocket("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?watson-token=""))
{
string startActionjson = "{\"action\": \"start\", \"content-type\": \"audio/wav\", \"continuous\" : true, \"interim_results\": true}";
ws.OnOpen += (sender, e) => ws.Send(startActionjson);
// Set the WebSocket events.
string result = string.Empty;
ws.OnMessage += Ws_OnMessage;
ws.OnError += (sender, e) =>
nf.Notify(
new NotificationMessage
{
Summary = "WebSocket Error",
Body = e.Message,
Icon = "notification-message-im"
});
ws.OnClose += (sender, e) =>
nf.Notify(
new NotificationMessage
{
Summary = String.Format("WebSocket Close ({0})", e.Code),
Body = e.Reason,
Icon = "notification-message-im"
});
ws.Connect();
//ws.SendAsync(startActionjson, b =>
//{
// if (b == true)
// {
// //send the audio as binary
// string filePath = "E:\test33.wav";
// byte[] bytes = System.IO.File.ReadAllBytes(filePath);
// ws.SendAsync(bytes, b1 =>
// {
// if (b1)
// ws.Close();
// });
// // result+=result+ws.
// }
//});
// Connect to the server asynchronously.
//ws.ConnectAsync ();
//Console.WriteLine("\nType 'exit' to exit.\n");
string filePath = "E:\Test3.wav";
byte[] bytes = System.IO.File.ReadAllBytes(filePath);
while (true)
{
Thread.Sleep(3000);
ws.SendAsync(bytes, b1 =>
{
if (b1)
ws.Close();
});
}
//for (int i = 0; i < bytes.Length; i += 1000000)
//{
// Thread.Sleep(1000);
// byte[] buffer = new byte[1000000];
// Buffer.BlockCopy(bytes, i, buffer, 0, 128);
// // ws.Send(buffer);
// ws.SendAsync(buffer, b1 =>
// {
// if (b1)
// ws.Close();
// });
//}
}
}
private static void Ws_OnMessage(object sender, MessageEventArgs e)
{
string s = e.Data;
}
根据文档,无论输入法如何,都有一个 100mb limit。因此,您必须将音频文件拆分为 < 100mb 的块。
要流式传输音频,而不是调用 System.IO.File.ReadAllBytes(filePath);
并迭代结果,我认为您需要创建一个 FileStream
.
此外,您不应在到达文件末尾后立即关闭 websocket - 这可能会阻止您接收所有结果。相反,发送字符串 {"action": "stop"}
并等待,直到收到 {"state": "listening"}
的响应 indicates that it has completed processing your audio 并发回所有文本。
更新: 我得到了一台 windows 机器,安装了 visual studio,并组装了一个工作样本。我从来没有弄清楚您使用的是什么 WebSocket API/Library,但这只是使用了我可以在 microsoft.com 上找到文档的内置内容,因此它应该对您有用。
我用几个不同的 .ogg 和 .wav 文件对其进行了测试,并确认我得到了预期的多个中期和最终结果。
using System;
using System.Net.WebSockets;
using System.Net;
using System.Runtime.Serialization.Json;
using System.Threading;
using System.Threading.Tasks;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
// Perform streaming transcription of an audio file using the IBM Watson Speech to Text service over a websocket
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/speech-to-text.html
// https://msdn.microsoft.com/en-us/library/system.net.websockets.clientwebsocket%28v=vs.110%29.aspx
namespace WatsonSTTWebsocketExample
{
class Program
{
static void Main(string[] args)
{
Transcribe();
Console.WriteLine("Press any key to exit");
Console.ReadLine();
}
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/doc/getting_started/gs-credentials.shtml
static String username = "<username>";
static String password = "<password>";
static String file = @"c:\audio.wav";
static Uri url = new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize");
static ArraySegment<byte> openingMessage = new ArraySegment<byte>( Encoding.UTF8.GetBytes(
"{\"action\": \"start\", \"content-type\": \"audio/wav\", \"continuous\" : true, \"interim_results\": true}"
));
static ArraySegment<byte> closingMessage = new ArraySegment<byte>(Encoding.UTF8.GetBytes(
"{\"action\": \"stop\"}"
));
static void Transcribe()
{
var ws = new ClientWebSocket();
ws.Options.Credentials = new NetworkCredential(username, password);
ws.ConnectAsync(url, CancellationToken.None).Wait();
// send opening message and wait for initial delimeter
Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws));
// send all audio and then a closing message; simltaneously print all results until delimeter is recieved
Task.WaitAll(SendAudio(ws), HandleResults(ws));
// close down the websocket
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait();
}
static async Task SendAudio(ClientWebSocket ws)
{
using (FileStream fs = File.OpenRead(file))
{
byte[] b = new byte[1024];
while (fs.Read(b, 0, b.Length) > 0)
{
await ws.SendAsync(new ArraySegment<byte>(b), WebSocketMessageType.Binary, true, CancellationToken.None);
}
await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None);
}
}
// prints results until the connection closes or a delimeterMessage is recieved
static async Task HandleResults(ClientWebSocket ws)
{
var buffer = new byte[1024];
while (true)
{
var segment = new ArraySegment<byte>(buffer);
var result = await ws.ReceiveAsync(segment, CancellationToken.None);
if (result.MessageType == WebSocketMessageType.Close)
{
return;
}
int count = result.Count;
while (!result.EndOfMessage)
{
if (count >= buffer.Length)
{
await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None);
return;
}
segment = new ArraySegment<byte>(buffer, count, buffer.Length - count);
result = await ws.ReceiveAsync(segment, CancellationToken.None);
count += result.Count;
}
var message = Encoding.UTF8.GetString(buffer, 0, count);
// you'll probably want to parse the JSON into a useful object here,
// see ServiceState and IsDelimeter for a light-weight example of that.
Console.WriteLine(message);
if (IsDelimeter(message))
{
return;
}
}
}
// the watson service sends a {"state": "listening"} message at both the beginning and the *end* of the results
// this checks for that
[DataContract]
internal class ServiceState
{
[DataMember]
public string state = "";
}
static bool IsDelimeter(String json)
{
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState));
ServiceState obj = (ServiceState)ser.ReadObject(stream);
return obj.state == "listening";
}
}
}
我正在实施 C# 代码,我试图在其中转录超过 100 mb 的音频,但它不允许我开发需要可以在 C# 中发送超过 100 mb 音频的程序
在此代码中,我使用的是网络套接字,但我如何发送音频流
public static void CallWatson()
{
using (var nf = new Notifier())
using (var ws = new WebSocket("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?watson-token=""))
{
string startActionjson = "{\"action\": \"start\", \"content-type\": \"audio/wav\", \"continuous\" : true, \"interim_results\": true}";
ws.OnOpen += (sender, e) => ws.Send(startActionjson);
// Set the WebSocket events.
string result = string.Empty;
ws.OnMessage += Ws_OnMessage;
ws.OnError += (sender, e) =>
nf.Notify(
new NotificationMessage
{
Summary = "WebSocket Error",
Body = e.Message,
Icon = "notification-message-im"
});
ws.OnClose += (sender, e) =>
nf.Notify(
new NotificationMessage
{
Summary = String.Format("WebSocket Close ({0})", e.Code),
Body = e.Reason,
Icon = "notification-message-im"
});
ws.Connect();
//ws.SendAsync(startActionjson, b =>
//{
// if (b == true)
// {
// //send the audio as binary
// string filePath = "E:\test33.wav";
// byte[] bytes = System.IO.File.ReadAllBytes(filePath);
// ws.SendAsync(bytes, b1 =>
// {
// if (b1)
// ws.Close();
// });
// // result+=result+ws.
// }
//});
// Connect to the server asynchronously.
//ws.ConnectAsync ();
//Console.WriteLine("\nType 'exit' to exit.\n");
string filePath = "E:\Test3.wav";
byte[] bytes = System.IO.File.ReadAllBytes(filePath);
while (true)
{
Thread.Sleep(3000);
ws.SendAsync(bytes, b1 =>
{
if (b1)
ws.Close();
});
}
//for (int i = 0; i < bytes.Length; i += 1000000)
//{
// Thread.Sleep(1000);
// byte[] buffer = new byte[1000000];
// Buffer.BlockCopy(bytes, i, buffer, 0, 128);
// // ws.Send(buffer);
// ws.SendAsync(buffer, b1 =>
// {
// if (b1)
// ws.Close();
// });
//}
}
}
private static void Ws_OnMessage(object sender, MessageEventArgs e)
{
string s = e.Data;
}
根据文档,无论输入法如何,都有一个 100mb limit。因此,您必须将音频文件拆分为 < 100mb 的块。
要流式传输音频,而不是调用 System.IO.File.ReadAllBytes(filePath);
并迭代结果,我认为您需要创建一个 FileStream
.
此外,您不应在到达文件末尾后立即关闭 websocket - 这可能会阻止您接收所有结果。相反,发送字符串 {"action": "stop"}
并等待,直到收到 {"state": "listening"}
的响应 indicates that it has completed processing your audio 并发回所有文本。
更新: 我得到了一台 windows 机器,安装了 visual studio,并组装了一个工作样本。我从来没有弄清楚您使用的是什么 WebSocket API/Library,但这只是使用了我可以在 microsoft.com 上找到文档的内置内容,因此它应该对您有用。
我用几个不同的 .ogg 和 .wav 文件对其进行了测试,并确认我得到了预期的多个中期和最终结果。
using System;
using System.Net.WebSockets;
using System.Net;
using System.Runtime.Serialization.Json;
using System.Threading;
using System.Threading.Tasks;
using System.Text;
using System.IO;
using System.Runtime.Serialization;
// Perform streaming transcription of an audio file using the IBM Watson Speech to Text service over a websocket
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/speech-to-text.html
// https://msdn.microsoft.com/en-us/library/system.net.websockets.clientwebsocket%28v=vs.110%29.aspx
namespace WatsonSTTWebsocketExample
{
class Program
{
static void Main(string[] args)
{
Transcribe();
Console.WriteLine("Press any key to exit");
Console.ReadLine();
}
// http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/doc/getting_started/gs-credentials.shtml
static String username = "<username>";
static String password = "<password>";
static String file = @"c:\audio.wav";
static Uri url = new Uri("wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize");
static ArraySegment<byte> openingMessage = new ArraySegment<byte>( Encoding.UTF8.GetBytes(
"{\"action\": \"start\", \"content-type\": \"audio/wav\", \"continuous\" : true, \"interim_results\": true}"
));
static ArraySegment<byte> closingMessage = new ArraySegment<byte>(Encoding.UTF8.GetBytes(
"{\"action\": \"stop\"}"
));
static void Transcribe()
{
var ws = new ClientWebSocket();
ws.Options.Credentials = new NetworkCredential(username, password);
ws.ConnectAsync(url, CancellationToken.None).Wait();
// send opening message and wait for initial delimeter
Task.WaitAll(ws.SendAsync(openingMessage, WebSocketMessageType.Text, true, CancellationToken.None), HandleResults(ws));
// send all audio and then a closing message; simltaneously print all results until delimeter is recieved
Task.WaitAll(SendAudio(ws), HandleResults(ws));
// close down the websocket
ws.CloseAsync(WebSocketCloseStatus.NormalClosure, "Close", CancellationToken.None).Wait();
}
static async Task SendAudio(ClientWebSocket ws)
{
using (FileStream fs = File.OpenRead(file))
{
byte[] b = new byte[1024];
while (fs.Read(b, 0, b.Length) > 0)
{
await ws.SendAsync(new ArraySegment<byte>(b), WebSocketMessageType.Binary, true, CancellationToken.None);
}
await ws.SendAsync(closingMessage, WebSocketMessageType.Text, true, CancellationToken.None);
}
}
// prints results until the connection closes or a delimeterMessage is recieved
static async Task HandleResults(ClientWebSocket ws)
{
var buffer = new byte[1024];
while (true)
{
var segment = new ArraySegment<byte>(buffer);
var result = await ws.ReceiveAsync(segment, CancellationToken.None);
if (result.MessageType == WebSocketMessageType.Close)
{
return;
}
int count = result.Count;
while (!result.EndOfMessage)
{
if (count >= buffer.Length)
{
await ws.CloseAsync(WebSocketCloseStatus.InvalidPayloadData, "That's too long", CancellationToken.None);
return;
}
segment = new ArraySegment<byte>(buffer, count, buffer.Length - count);
result = await ws.ReceiveAsync(segment, CancellationToken.None);
count += result.Count;
}
var message = Encoding.UTF8.GetString(buffer, 0, count);
// you'll probably want to parse the JSON into a useful object here,
// see ServiceState and IsDelimeter for a light-weight example of that.
Console.WriteLine(message);
if (IsDelimeter(message))
{
return;
}
}
}
// the watson service sends a {"state": "listening"} message at both the beginning and the *end* of the results
// this checks for that
[DataContract]
internal class ServiceState
{
[DataMember]
public string state = "";
}
static bool IsDelimeter(String json)
{
MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(json));
DataContractJsonSerializer ser = new DataContractJsonSerializer(typeof(ServiceState));
ServiceState obj = (ServiceState)ser.ReadObject(stream);
return obj.state == "listening";
}
}
}