"Expected a JSON header message before receiving binary data" 使用 WebSocket-Sharp 和 IBM Watson TTS 时出错

"Expected a JSON header message before receiving binary data" error using WebSocket-Sharp and IBM Watson TTS

我正在尝试使用 IBM Watson 的 WebSocket 接口,以便获取音频的词时序。 Here is a link to the documentation. 身份验证部分似乎工作正常。但是在 OnOpen 方法中发送 JSON 消息后,我一直收到错误 "Expected a JSON header message before receiving binary data."。我从 NuGet 获得了 WebSocket-Sharp 库(称为 websocket-sharp-core)。

如何解决这个问题?

using WebSocketSharp;
using Newtonsoft.Json.Linq;
using System.Net.Http;
using System.Net;
using System.Net.Http.Headers;

async void GenVoice(string text, string dest)
{
    Console.WriteLine("obtaining access token...");
    var dict = new Dictionary<string, string>();
    dict.Add("grant_type", "urn:ibm:params:oauth:grant-type:apikey");
    dict.Add("apikey", "<my_api_key>");
    var authClient = new HttpClient();
    authClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
    var response = await authClient.PostAsync("https://iam.cloud.ibm.com/identity/token", new FormUrlEncodedContent(dict));
    var auth = JObject.Parse(await response.Content.ReadAsStringAsync());
    var accessToken = auth["access_token"].ToString();
    Console.WriteLine("access token is " + accessToken);

    var client = new WebSocket($"wss://stream.watsonplatform.net/text-to-speech/api/v1/synthesize?access_token={accessToken}&voice=en-US_MichaelV3Voice");
    client.Connect();
    client.OnOpen += OnOpen(client, text);
    client.OnMessage += OnMessage;
    client.OnError += (sender, e) =>
    {
        Console.WriteLine(e.Message);
    };
    client.OnClose += (sender, e) =>
    {
        Console.WriteLine($"closed; code={e.Code}; reason={e.Reason}; wasclean={e.WasClean}");
    };
}

System.EventHandler OnOpen(object sender, string text)
{
    var client = (WebSocket)sender;

    var message = new JObject();
    message.Add("content-type", JToken.FromObject("application/json"));
    message.Add("accept", JToken.FromObject("*/*"));
    message.Add("text", JToken.FromObject(text));
    message.Add("timings", JToken.FromObject(new string[] { "words" }));
    client.Send(Encoding.UTF8.GetBytes(message.ToString()));
    Console.WriteLine("successfully opened socket");
    return null;
}

void OnMessage(object sender, MessageEventArgs e)
{
    if (e.IsText)
    {
        Console.WriteLine("got a message of type string, it says: " + e.Data);
    }
    if (e.IsBinary)
    {
        Console.WriteLine("got a message of type binary");
    }
    if (e.IsPing) { Console.WriteLine("got a message of type ping"); }
}

我找到了解决方案,我认为问题出在 WebSocketSharp 库本身。我切换到 System.Net.WebSockets dll,它现在可以工作了。对于任何需要帮助的人,这里是最终的工作代码。

using System.Net.WebSockets;
using Newtonsoft.Json.Linq;

var client = new ClientWebSocket();
var canc = new System.Threading.CancellationToken();
await client.ConnectAsync(new Uri($"wss://stream.watsonplatform.net/text-to-speech/api/v1/synthesize?access_token={accessToken}&voice=en-US_MichaelV3Voice"), canc);
var message = new JObject();
message.Add("accept", JToken.FromObject("audio/wav"));
message.Add("text", JToken.FromObject(text));
message.Add("timings", JToken.FromObject(new string[] { "words" }));
await client.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(message.ToString())), WebSocketMessageType.Text, true, canc);

var toReturn = new List<Timing>();
while (client.State == WebSocketState.Open)
{
    var buffer = new byte[4096 * 20];
    var response = await client.ReceiveAsync(new ArraySegment<byte>(buffer), canc);
    var data = new List<byte>();
    if (response.MessageType == WebSocketMessageType.Text)
    {
        Console.WriteLine("got a string, it says: " + Encoding.UTF8.GetString(data.ToArray()));
    }
}