对于 Watson 的 Speech-To-Text Unity SDK,您如何指定关键字?
For Watson's Speech-To-Text Unity SDK, how can you specify keywords?
我正在尝试在 Watson 的 Speech-To-Text 中指定关键字 Unity SDK
,但我不确定如何执行此操作。
详细信息页面未显示示例(请参阅此处:https://www.ibm.com/watson/developercloud/doc/speech-to-text/output.shtml),
和其他论坛 post 是为 Java 应用程序编写的(参见此处:)。
我试过像这样在 "Recognize" 函数中创建的 RecognizeRequest
class 中对这些值进行硬编码,但没有成功:
**编辑 - 永远不会调用此函数 - **
public bool Recognize(AudioClip clip, OnRecognize callback)
{
if (clip == null)
throw new ArgumentNullException("clip");
if (callback == null)
throw new ArgumentNullException("callback");
RESTConnector connector = RESTConnector.GetConnector(SERVICE_ID, "/v1/recognize");
if (connector == null)
return false;
RecognizeRequest req = new RecognizeRequest();
req.Clip = clip;
req.Callback = callback;
req.Headers["Content-Type"] = "audio/wav";
req.Send = WaveFile.CreateWAV(clip);
if (req.Send.Length > MAX_RECOGNIZE_CLIP_SIZE)
{
Log.Error("SpeechToText", "AudioClip is too large for Recognize().");
return false;
}
req.Parameters["model"] = m_RecognizeModel;
req.Parameters["continuous"] = "false";
req.Parameters["max_alternatives"] = m_MaxAlternatives.ToString();
req.Parameters["timestamps"] = m_Timestamps ? "true" : "false";
req.Parameters["word_confidence"] = m_WordConfidence ? "true" :false";
//these "keywords" and "keywords_threshold" and "keywordsThreshold" parameters
//are just my guess for how to set these values
req.Parameters["keywords"] = new string[] {"fun", "match", "test" };
req.Parameters["keywordsThreshold"] = .2;
req.Parameters["keywords_threshold"] = .2;
//end my test insertions
req.OnResponse = OnRecognizeResponse;
return connector.Send(req);
}
但返回的SpeechRecognitionEvent
结果值不包含任何keywords_result
。这是我的目标。我试图像这样查看 keywords_result 对象中每个关键字的置信度,但 keywords_result
对象返回为 null
.
private void OnRecognize(SpeechRecognitionEvent result) {
Debug.Log("Recognizing!");
m_ResultOutput.SendData(new SpeechToTextData(result));
if (result != null && result.results.Length > 0) {
if (m_Transcript != null)
m_Transcript.text = "";
foreach (var res in result.results) {
//the res.keywords_result comes back as null
foreach (var keyword in res.keywords_result.keyword) {
string text = keyword.normalized_text;
float confidence = keyword.confidence;
Debug.Log(text + ": " + confidence);
}
}
}
}
有人在 Unity 或 C# 中使用 Watson 的 Speech-To-Text SDK 成功实施了关键字置信度评估吗?欢迎所有想法和建议。
PS 这是我的第一个 post :)
原来我需要在 "SendStart" 函数中指定关键字,如下所示:
private void SendStart() {
if (m_ListenSocket == null)
throw new WatsonException("SendStart() called with null connector.");
Dictionary<string, object> start = new Dictionary<string, object>();
start["action"] = "start";
start["content-type"] = "audio/l16;rate=" + m_RecordingHZ.ToString() + ";channels=1;";
start["continuous"] = EnableContinousRecognition;
start["max_alternatives"] = m_MaxAlternatives;
start["interim_results"] = EnableInterimResults;
start["word_confidence"] = m_WordConfidence;
start["timestamps"] = m_Timestamps;
//specify keywords here
start["keywords"] = keywordsToCheck.ToArray();
start["keywords_threshold"] = 0.05;
//end additions here
m_ListenSocket.Send(new WSConnector.TextMessage(Json.Serialize(start)));
m_LastStartSent = DateTime.Now;
}
并编写一些代码以在 "ParseRecognizeResponse" 函数中正确解析 keyword_results:
private SpeechRecognitionEvent ParseRecognizeResponse(IDictionary resp){
if (resp == null)
return null;
List<SpeechRecognitionResult> results = new List<SpeechRecognitionResult>();
IList iresults = resp["results"] as IList;
if (iresults == null)
return null;
foreach (var r in iresults)
{
IDictionary iresult = r as IDictionary;
if (iresults == null)
continue;
SpeechRecognitionResult result = new SpeechRecognitionResult();
//added this section, starting here
IDictionary iKeywords_result = iresult["keywords_result"] as IDictionary;
result.keywords_result = new KeywordResults();
List<KeywordResult> keywordResults = new List<KeywordResult>();
foreach (string key in keywordsToCheck) {
if (iKeywords_result[key] != null) {
IList keyword_Results = iKeywords_result[key] as IList;
if (keyword_Results == null) {
continue;
}
foreach (var res in keyword_Results) {
IDictionary kw_resultDic = res as IDictionary;
KeywordResult keyword_Result = new KeywordResult();
keyword_Result.confidence = (double)kw_resultDic["confidence"];
keyword_Result.end_time = (double)kw_resultDic["end_time"];
keyword_Result.start_time = (double)kw_resultDic["start_time"];
keyword_Result.normalized_text = (string)kw_resultDic["normalized_text"];
keywordResults.Add(keyword_Result);
}
}
}
result.keywords_result.keyword = keywordResults.ToArray();
//ends here
result.final = (bool)iresult["final"];
IList ialternatives = iresult["alternatives"] as IList;
if (ialternatives == null)
continue;
List<SpeechRecognitionAlternative> alternatives = new List<SpeechRecognitionAlternative>();
foreach (var a in ialternatives)
{
IDictionary ialternative = a as IDictionary;
if (ialternative == null)
continue;
SpeechRecognitionAlternative alternative = new SpeechRecognitionAlternative();
alternative.transcript = (string)ialternative["transcript"];
if (ialternative.Contains("confidence"))
alternative.confidence = (double)ialternative["confidence"];
if (ialternative.Contains("timestamps"))
{
IList itimestamps = ialternative["timestamps"] as IList;
TimeStamp[] timestamps = new TimeStamp[itimestamps.Count];
for (int i = 0; i < itimestamps.Count; ++i)
{
IList itimestamp = itimestamps[i] as IList;
if (itimestamp == null)
continue;
TimeStamp ts = new TimeStamp();
ts.Word = (string)itimestamp[0];
ts.Start = (double)itimestamp[1];
ts.End = (double)itimestamp[2];
timestamps[i] = ts;
}
alternative.Timestamps = timestamps;
}
if (ialternative.Contains("word_confidence"))
{
IList iconfidence = ialternative["word_confidence"] as IList;
WordConfidence[] confidence = new WordConfidence[iconfidence.Count];
for (int i = 0; i < iconfidence.Count; ++i)
{
IList iwordconf = iconfidence[i] as IList;
if (iwordconf == null)
continue;
WordConfidence wc = new WordConfidence();
wc.Word = (string)iwordconf[0];
wc.Confidence = (double)iwordconf[1];
confidence[i] = wc;
}
alternative.WordConfidence = confidence;
}
alternatives.Add(alternative);
}
result.alternatives = alternatives.ToArray();
results.Add(result);
}
return new SpeechRecognitionEvent(results.ToArray());
}
现在,当 OnRecognize 通过此 SpeechRecognitionEvent 时,我更改了用于显示备选词及其置信度分数的代码,以显示关键字结果及其置信度分数,如下所示:
private void OnRecognize(SpeechRecognitionEvent result) {
//Debug.Log("Recognizing!");
m_ResultOutput.SendData(new SpeechToTextData(result));
if (result != null && result.results.Length > 0) {
if (m_Transcript != null)
m_Transcript.text = "";
foreach (var res in result.results) {
//start keyword recognition changes here
if (res.keywords_result != null) {
if (res.keywords_result.keyword != null) {
foreach (var keyword in res.keywords_result.keyword) {
m_Transcript.text += string.Format("{0} ({1}, {2:0.00})\n",
keyword.normalized_text, res.final ? "Final" : "Interim", keyword.confidence);
}
}
}
//end here
}
}
}
请注意,使用关键字结果置信度值比进行一些硬编码检查以查看 Watson 正在获取的备选词是否与您的关键字匹配,然后在那里使用置信度值更有价值。检查 keyword_results.keyword[].confidence 值时,置信度值返回得更高,因为它已经在检查这些词。这是完成此过程并解析 SpeechRecognitionEvent 结果值以正确包含 keywords_result 值的动力。
为了某些背景,我正在为 children 有阅读障碍的人制作节奏游戏以学习构词,所以想想《吉他英雄》遇见芝麻街。
我正在尝试在 Watson 的 Speech-To-Text 中指定关键字 Unity SDK
,但我不确定如何执行此操作。
详细信息页面未显示示例(请参阅此处:https://www.ibm.com/watson/developercloud/doc/speech-to-text/output.shtml),
和其他论坛 post 是为 Java 应用程序编写的(参见此处:
我试过像这样在 "Recognize" 函数中创建的 RecognizeRequest
class 中对这些值进行硬编码,但没有成功:
**编辑 - 永远不会调用此函数 - **
public bool Recognize(AudioClip clip, OnRecognize callback)
{
if (clip == null)
throw new ArgumentNullException("clip");
if (callback == null)
throw new ArgumentNullException("callback");
RESTConnector connector = RESTConnector.GetConnector(SERVICE_ID, "/v1/recognize");
if (connector == null)
return false;
RecognizeRequest req = new RecognizeRequest();
req.Clip = clip;
req.Callback = callback;
req.Headers["Content-Type"] = "audio/wav";
req.Send = WaveFile.CreateWAV(clip);
if (req.Send.Length > MAX_RECOGNIZE_CLIP_SIZE)
{
Log.Error("SpeechToText", "AudioClip is too large for Recognize().");
return false;
}
req.Parameters["model"] = m_RecognizeModel;
req.Parameters["continuous"] = "false";
req.Parameters["max_alternatives"] = m_MaxAlternatives.ToString();
req.Parameters["timestamps"] = m_Timestamps ? "true" : "false";
req.Parameters["word_confidence"] = m_WordConfidence ? "true" :false";
//these "keywords" and "keywords_threshold" and "keywordsThreshold" parameters
//are just my guess for how to set these values
req.Parameters["keywords"] = new string[] {"fun", "match", "test" };
req.Parameters["keywordsThreshold"] = .2;
req.Parameters["keywords_threshold"] = .2;
//end my test insertions
req.OnResponse = OnRecognizeResponse;
return connector.Send(req);
}
但返回的SpeechRecognitionEvent
结果值不包含任何keywords_result
。这是我的目标。我试图像这样查看 keywords_result 对象中每个关键字的置信度,但 keywords_result
对象返回为 null
.
private void OnRecognize(SpeechRecognitionEvent result) {
Debug.Log("Recognizing!");
m_ResultOutput.SendData(new SpeechToTextData(result));
if (result != null && result.results.Length > 0) {
if (m_Transcript != null)
m_Transcript.text = "";
foreach (var res in result.results) {
//the res.keywords_result comes back as null
foreach (var keyword in res.keywords_result.keyword) {
string text = keyword.normalized_text;
float confidence = keyword.confidence;
Debug.Log(text + ": " + confidence);
}
}
}
}
有人在 Unity 或 C# 中使用 Watson 的 Speech-To-Text SDK 成功实施了关键字置信度评估吗?欢迎所有想法和建议。
PS 这是我的第一个 post :)
原来我需要在 "SendStart" 函数中指定关键字,如下所示:
private void SendStart() {
if (m_ListenSocket == null)
throw new WatsonException("SendStart() called with null connector.");
Dictionary<string, object> start = new Dictionary<string, object>();
start["action"] = "start";
start["content-type"] = "audio/l16;rate=" + m_RecordingHZ.ToString() + ";channels=1;";
start["continuous"] = EnableContinousRecognition;
start["max_alternatives"] = m_MaxAlternatives;
start["interim_results"] = EnableInterimResults;
start["word_confidence"] = m_WordConfidence;
start["timestamps"] = m_Timestamps;
//specify keywords here
start["keywords"] = keywordsToCheck.ToArray();
start["keywords_threshold"] = 0.05;
//end additions here
m_ListenSocket.Send(new WSConnector.TextMessage(Json.Serialize(start)));
m_LastStartSent = DateTime.Now;
}
并编写一些代码以在 "ParseRecognizeResponse" 函数中正确解析 keyword_results:
private SpeechRecognitionEvent ParseRecognizeResponse(IDictionary resp){
if (resp == null)
return null;
List<SpeechRecognitionResult> results = new List<SpeechRecognitionResult>();
IList iresults = resp["results"] as IList;
if (iresults == null)
return null;
foreach (var r in iresults)
{
IDictionary iresult = r as IDictionary;
if (iresults == null)
continue;
SpeechRecognitionResult result = new SpeechRecognitionResult();
//added this section, starting here
IDictionary iKeywords_result = iresult["keywords_result"] as IDictionary;
result.keywords_result = new KeywordResults();
List<KeywordResult> keywordResults = new List<KeywordResult>();
foreach (string key in keywordsToCheck) {
if (iKeywords_result[key] != null) {
IList keyword_Results = iKeywords_result[key] as IList;
if (keyword_Results == null) {
continue;
}
foreach (var res in keyword_Results) {
IDictionary kw_resultDic = res as IDictionary;
KeywordResult keyword_Result = new KeywordResult();
keyword_Result.confidence = (double)kw_resultDic["confidence"];
keyword_Result.end_time = (double)kw_resultDic["end_time"];
keyword_Result.start_time = (double)kw_resultDic["start_time"];
keyword_Result.normalized_text = (string)kw_resultDic["normalized_text"];
keywordResults.Add(keyword_Result);
}
}
}
result.keywords_result.keyword = keywordResults.ToArray();
//ends here
result.final = (bool)iresult["final"];
IList ialternatives = iresult["alternatives"] as IList;
if (ialternatives == null)
continue;
List<SpeechRecognitionAlternative> alternatives = new List<SpeechRecognitionAlternative>();
foreach (var a in ialternatives)
{
IDictionary ialternative = a as IDictionary;
if (ialternative == null)
continue;
SpeechRecognitionAlternative alternative = new SpeechRecognitionAlternative();
alternative.transcript = (string)ialternative["transcript"];
if (ialternative.Contains("confidence"))
alternative.confidence = (double)ialternative["confidence"];
if (ialternative.Contains("timestamps"))
{
IList itimestamps = ialternative["timestamps"] as IList;
TimeStamp[] timestamps = new TimeStamp[itimestamps.Count];
for (int i = 0; i < itimestamps.Count; ++i)
{
IList itimestamp = itimestamps[i] as IList;
if (itimestamp == null)
continue;
TimeStamp ts = new TimeStamp();
ts.Word = (string)itimestamp[0];
ts.Start = (double)itimestamp[1];
ts.End = (double)itimestamp[2];
timestamps[i] = ts;
}
alternative.Timestamps = timestamps;
}
if (ialternative.Contains("word_confidence"))
{
IList iconfidence = ialternative["word_confidence"] as IList;
WordConfidence[] confidence = new WordConfidence[iconfidence.Count];
for (int i = 0; i < iconfidence.Count; ++i)
{
IList iwordconf = iconfidence[i] as IList;
if (iwordconf == null)
continue;
WordConfidence wc = new WordConfidence();
wc.Word = (string)iwordconf[0];
wc.Confidence = (double)iwordconf[1];
confidence[i] = wc;
}
alternative.WordConfidence = confidence;
}
alternatives.Add(alternative);
}
result.alternatives = alternatives.ToArray();
results.Add(result);
}
return new SpeechRecognitionEvent(results.ToArray());
}
现在,当 OnRecognize 通过此 SpeechRecognitionEvent 时,我更改了用于显示备选词及其置信度分数的代码,以显示关键字结果及其置信度分数,如下所示:
private void OnRecognize(SpeechRecognitionEvent result) {
//Debug.Log("Recognizing!");
m_ResultOutput.SendData(new SpeechToTextData(result));
if (result != null && result.results.Length > 0) {
if (m_Transcript != null)
m_Transcript.text = "";
foreach (var res in result.results) {
//start keyword recognition changes here
if (res.keywords_result != null) {
if (res.keywords_result.keyword != null) {
foreach (var keyword in res.keywords_result.keyword) {
m_Transcript.text += string.Format("{0} ({1}, {2:0.00})\n",
keyword.normalized_text, res.final ? "Final" : "Interim", keyword.confidence);
}
}
}
//end here
}
}
}
请注意,使用关键字结果置信度值比进行一些硬编码检查以查看 Watson 正在获取的备选词是否与您的关键字匹配,然后在那里使用置信度值更有价值。检查 keyword_results.keyword[].confidence 值时,置信度值返回得更高,因为它已经在检查这些词。这是完成此过程并解析 SpeechRecognitionEvent 结果值以正确包含 keywords_result 值的动力。
为了某些背景,我正在为 children 有阅读障碍的人制作节奏游戏以学习构词,所以想想《吉他英雄》遇见芝麻街。