JS AzureSDK 创建自定义函数来捕获语音、显示文本结果和结果的置信度
JS AzureSDK create custom function to capture speech, display the text results and a confidence level of the results
我需要创建一个简单的 javascript 函数来捕获输入,然后使用 AzureSDK return 具有置信度百分比的文本。
我最大的问题是我刚接触编码,这是我遇到的最困难的问题,所以请善待这个卑微的学生。
我正在构建一个使用语音输入的语言学习网络应用程序。我已经能够让 google 服务按照我想要的方式工作,但不幸的是,这些服务在我的市场所在的中国不起作用。我也在使用 Phaser 3 api 来构建这个应用程序。
我已经能够获取 git 上提供的示例代码,以便 AzureSDK 语音转文本 javascript 正常工作,但是当我尝试使用代码创建自己的函数时,我得到:
未捕获的类型错误:无法读取未定义的 属性 'SpeechConfig'
我也不知道如何为语音结果添加置信度。
recordButton.on('pointerdown', function() {
var SDK = window.SpeechSDK;
try {
AudioContext = window.AudioContext // our preferred impl
|| window.webkitAudioContext // fallback, mostly for Safari
|| false; // could not find.
if (AudioContext) {
soundContext = new AudioContext();
console.log("AudioContext", AudioContext);
} else {
alert("Audio context not supported");
}
}
catch (e) {
console.log("no sound context found, no audio output. " + e);
}
console.log("SpeechSDK initialized", SDK);
speechConfig =
SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey,
serviceRegion);
speechConfig.speechRecognitionLanguage = "en-US";
console.log("speechConfig", SpeechConfig);
audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig,
audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log("result", result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
});
}, this);
我需要捕获语音输入,然后显示 words/phrases/sentences 学生所说的内容并根据置信度评分。
如果您想对从语音到文本 SDK 获得的文本值获得自信的分数,请尝试以下代码:
<html>
<head>
<title>Speech SDK JavaScript Quickstart</title>
</head>
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<body>
<div id="warning">
<h1 style="font-weight:500;">Speech Recognition Speech SDK not found (microsoft.cognitiveservices.speech.sdk.bundle.js missing).</h1>
</div>
<div id="content" style="display:none">
<table width="100%">
<tr>
<td></td>
<td><h1 style="font-weight:500;">Microsoft Cognitive Services Speech SDK JavaScript Quickstart</h1></td>
</tr>
<tr>
<td align="right"><a href="https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started" target="_blank">Subscription</a>:</td>
<td><input id="subscriptionKey" type="text" size="40" value="subscription"></td>
</tr>
<tr>
<td align="right">Region</td>
<td><input id="serviceRegion" type="text" size="40" value="YourServiceRegion"></td>
</tr>
<tr>
<td></td>
<td><button id="startRecognizeOnceAsyncButton">Start recognition</button></td>
</tr>
<tr>
<td align="right" valign="top">Results</td>
<td><textarea id="phraseDiv" style="display: inline-block;width:500px;height:200px"></textarea></td>
</tr>
</table>
</div>
</body>
<!-- Speech SDK USAGE -->
<script>
// status fields and start button in UI
var phraseDiv;
var startRecognizeOnceAsyncButton;
// subscription key and region for speech services.
var subscriptionKey, serviceRegion;
var authorizationToken;
var SpeechSDK;
var recognizer;
document.addEventListener("DOMContentLoaded", function () {
startRecognizeOnceAsyncButton = document.getElementById("startRecognizeOnceAsyncButton");
subscriptionKey = document.getElementById("subscriptionKey");
serviceRegion = document.getElementById("serviceRegion");
phraseDiv = document.getElementById("phraseDiv");
startRecognizeOnceAsyncButton.addEventListener("click", function () {
startRecognizeOnceAsyncButton.disabled = true;
phraseDiv.innerHTML = "";
// if we got an authorization token, use the token. Otherwise use the provided subscription key
var speechConfig;
if (authorizationToken) {
speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
} else {
if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
return;
}
speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
}
speechConfig.speechRecognitionLanguage = "en-US";
speechConfig.outputFormat=1;
var audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += "Recognize Result:"+ result.text + "\nConfidence Score:" + JSON.parse(result.json).NBest[0].Confidence;
window.console.log(result);
recognizer.close();
recognizer = undefined;
},
function (err) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += err;
window.console.log(err);
recognizer.close();
recognizer = undefined;
});
});
if (!!window.SpeechSDK) {
SpeechSDK = window.SpeechSDK;
startRecognizeOnceAsyncButton.disabled = false;
document.getElementById('content').style.display = 'block';
document.getElementById('warning').style.display = 'none';
// in case we have a function for getting an authorization token, call it.
if (typeof RequestAuthorizationToken === "function") {
RequestAuthorizationToken();
}
}
});
</script>
</html>
运行 页面与official doc 所示相同。总之,当你使用 sdk 时,你应该配置 speechConfig.outputFormat=1
这样你就可以得到语音服务的详细格式,包括置信度值。
结果:
在你的代码中,似乎未定义的错误是由于你想打印 SpeechConfig
但该参数定义为 speechConfig
...
总之,为了demo成功拿到confient score,我的代码是基于官方demo的。希望能帮助到你 。
对于您的代码,请尝试下面的 html:
<html>
<body>
<button id='recordButton' onclick = 'test()'>test </button>
</body>
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<script>
function test(){
var SDK = window.SpeechSDK;
try {
AudioContext = window.AudioContext // our preferred impl
|| window.webkitAudioContext // fallback, mostly for Safari
|| false; // could not find.
if (AudioContext) {
soundContext = new AudioContext();
console.log("AudioContext", AudioContext);
} else {
alert("Audio context not supported");
}
}
catch (e) {
console.log("no sound context found, no audio output. " + e);
}
console.log("SpeechSDK initialized", SDK);
var speechConfig =
SpeechSDK.SpeechConfig.fromSubscription("<your subscription key>",
"<your service region>");
speechConfig.speechRecognitionLanguage = "en-US";
console.log("speechConfig", speechConfig);
audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig,
audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log("result", result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
});
}
</script>
</html>
结果:如您所见,结果已记录:
如果我的回答有帮助,请单击答案旁边的复选标记将其从灰色切换为已填写以标记此答案,谢谢!
我需要创建一个简单的 javascript 函数来捕获输入,然后使用 AzureSDK return 具有置信度百分比的文本。
我最大的问题是我刚接触编码,这是我遇到的最困难的问题,所以请善待这个卑微的学生。
我正在构建一个使用语音输入的语言学习网络应用程序。我已经能够让 google 服务按照我想要的方式工作,但不幸的是,这些服务在我的市场所在的中国不起作用。我也在使用 Phaser 3 api 来构建这个应用程序。
我已经能够获取 git 上提供的示例代码,以便 AzureSDK 语音转文本 javascript 正常工作,但是当我尝试使用代码创建自己的函数时,我得到: 未捕获的类型错误:无法读取未定义的 属性 'SpeechConfig'
我也不知道如何为语音结果添加置信度。
recordButton.on('pointerdown', function() {
var SDK = window.SpeechSDK;
try {
AudioContext = window.AudioContext // our preferred impl
|| window.webkitAudioContext // fallback, mostly for Safari
|| false; // could not find.
if (AudioContext) {
soundContext = new AudioContext();
console.log("AudioContext", AudioContext);
} else {
alert("Audio context not supported");
}
}
catch (e) {
console.log("no sound context found, no audio output. " + e);
}
console.log("SpeechSDK initialized", SDK);
speechConfig =
SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey,
serviceRegion);
speechConfig.speechRecognitionLanguage = "en-US";
console.log("speechConfig", SpeechConfig);
audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig,
audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log("result", result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
});
}, this);
我需要捕获语音输入,然后显示 words/phrases/sentences 学生所说的内容并根据置信度评分。
如果您想对从语音到文本 SDK 获得的文本值获得自信的分数,请尝试以下代码:
<html>
<head>
<title>Speech SDK JavaScript Quickstart</title>
</head>
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<body>
<div id="warning">
<h1 style="font-weight:500;">Speech Recognition Speech SDK not found (microsoft.cognitiveservices.speech.sdk.bundle.js missing).</h1>
</div>
<div id="content" style="display:none">
<table width="100%">
<tr>
<td></td>
<td><h1 style="font-weight:500;">Microsoft Cognitive Services Speech SDK JavaScript Quickstart</h1></td>
</tr>
<tr>
<td align="right"><a href="https://docs.microsoft.com/azure/cognitive-services/speech-service/get-started" target="_blank">Subscription</a>:</td>
<td><input id="subscriptionKey" type="text" size="40" value="subscription"></td>
</tr>
<tr>
<td align="right">Region</td>
<td><input id="serviceRegion" type="text" size="40" value="YourServiceRegion"></td>
</tr>
<tr>
<td></td>
<td><button id="startRecognizeOnceAsyncButton">Start recognition</button></td>
</tr>
<tr>
<td align="right" valign="top">Results</td>
<td><textarea id="phraseDiv" style="display: inline-block;width:500px;height:200px"></textarea></td>
</tr>
</table>
</div>
</body>
<!-- Speech SDK USAGE -->
<script>
// status fields and start button in UI
var phraseDiv;
var startRecognizeOnceAsyncButton;
// subscription key and region for speech services.
var subscriptionKey, serviceRegion;
var authorizationToken;
var SpeechSDK;
var recognizer;
document.addEventListener("DOMContentLoaded", function () {
startRecognizeOnceAsyncButton = document.getElementById("startRecognizeOnceAsyncButton");
subscriptionKey = document.getElementById("subscriptionKey");
serviceRegion = document.getElementById("serviceRegion");
phraseDiv = document.getElementById("phraseDiv");
startRecognizeOnceAsyncButton.addEventListener("click", function () {
startRecognizeOnceAsyncButton.disabled = true;
phraseDiv.innerHTML = "";
// if we got an authorization token, use the token. Otherwise use the provided subscription key
var speechConfig;
if (authorizationToken) {
speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
} else {
if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
return;
}
speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
}
speechConfig.speechRecognitionLanguage = "en-US";
speechConfig.outputFormat=1;
var audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += "Recognize Result:"+ result.text + "\nConfidence Score:" + JSON.parse(result.json).NBest[0].Confidence;
window.console.log(result);
recognizer.close();
recognizer = undefined;
},
function (err) {
startRecognizeOnceAsyncButton.disabled = false;
phraseDiv.innerHTML += err;
window.console.log(err);
recognizer.close();
recognizer = undefined;
});
});
if (!!window.SpeechSDK) {
SpeechSDK = window.SpeechSDK;
startRecognizeOnceAsyncButton.disabled = false;
document.getElementById('content').style.display = 'block';
document.getElementById('warning').style.display = 'none';
// in case we have a function for getting an authorization token, call it.
if (typeof RequestAuthorizationToken === "function") {
RequestAuthorizationToken();
}
}
});
</script>
</html>
运行 页面与official doc 所示相同。总之,当你使用 sdk 时,你应该配置 speechConfig.outputFormat=1
这样你就可以得到语音服务的详细格式,包括置信度值。
结果:
在你的代码中,似乎未定义的错误是由于你想打印 SpeechConfig
但该参数定义为 speechConfig
...
总之,为了demo成功拿到confient score,我的代码是基于官方demo的。希望能帮助到你 。
对于您的代码,请尝试下面的 html:
<html>
<body>
<button id='recordButton' onclick = 'test()'>test </button>
</body>
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>
<script>
function test(){
var SDK = window.SpeechSDK;
try {
AudioContext = window.AudioContext // our preferred impl
|| window.webkitAudioContext // fallback, mostly for Safari
|| false; // could not find.
if (AudioContext) {
soundContext = new AudioContext();
console.log("AudioContext", AudioContext);
} else {
alert("Audio context not supported");
}
}
catch (e) {
console.log("no sound context found, no audio output. " + e);
}
console.log("SpeechSDK initialized", SDK);
var speechConfig =
SpeechSDK.SpeechConfig.fromSubscription("<your subscription key>",
"<your service region>");
speechConfig.speechRecognitionLanguage = "en-US";
console.log("speechConfig", speechConfig);
audioConfig = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
recognizer = new SpeechSDK.SpeechRecognizer(speechConfig,
audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log("result", result);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
});
}
</script>
</html>
结果:如您所见,结果已记录:
如果我的回答有帮助,请单击答案旁边的复选标记将其从灰色切换为已填写以标记此答案,谢谢!