检测用户说话的时间 SpeechRecognition API?
Detect the time when the user talk SpeechRecognition API?
我正在尝试检测用户何时暂停并再次开始说话而不停止识别。这是我设置的用于检测用户何时说话并将其输出到页面的代码:
//set up vars
var final_transcript;
var recognizing = false;
//check if using chrome and up to date
if ('webkitSpeechRecognition' in window) {
//init
var recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onstart = function() {
recognizing = true;
};
//if there is error somewhere
recognition.onerror = function(event) {
console.log(event.error);
};
recognition.onend = function() {
recognizing = false;
};
//after giving the spech
recognition.onresult = function(event) {
var interim_transcript = '';
for (var i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
// finilize and show the compleated text
final_transcript += event.results[i][0].transcript;
} else {
// run the speech and output it
interim_transcript += event.results[i][0].transcript;
}
}
final_transcript = capitalize(final_transcript);
final_span.innerHTML = linebreak(final_transcript);
interim_span.innerHTML = linebreak(interim_transcript);
};
}
var two_line = /\n\n/g;
var one_line = /\n/g;
function linebreak(s) {
return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
}
function capitalize(s) {
return s.replace(s.substr(0,1), function(m) { return m.toUpperCase(); });
}
function startDictation(event) {
if (recognizing) {
recognition.stop();
return;
}
final_transcript = '';
recognition.lang = 'en';
recognition.start();
}
//startDictation();
<div id="results">
<span id="final_span" class="final"></span>
<span id="interim_span" class="interim"></span>
</div>
那么有没有办法检测用户何时暂停以及他暂停的时间长度?
这不是一个完整的答案,但它是一个开始。
https://jsfiddle.net/persianturtle/7uygdyy1/1/
我不确定 onspeechstart
事件是否只触发一次,或者如果我在噪音较小的区域它是否会再次触发。
如果它触发多次,这就变得很容易做到,因为我们可以存储 speech start
和 speech end
次并从那里计算出静默期。
如果它 onspeechstart
只触发一次,那么您或许可以找到一种方法来获取给定文本的平均语音持续时间,并从那里找出静音。
但是,每次捕获单词时都会触发一个事件:onresult
。
所以基本思路是定义一个 activity
的数组,您可以 push
将语音数据放入其中,然后分析之后的静默期。
代码:
let activity = []
recognition.onresult = event => {
console.log(event)
activity.push('Ended:' + event.timeStamp)
}
recognition.onspeechstart = event => {
activity.push('Started:' + event.timeStamp)
}
recognition.onend = event => {
console.log(activity)
}
我正在尝试检测用户何时暂停并再次开始说话而不停止识别。这是我设置的用于检测用户何时说话并将其输出到页面的代码:
//set up vars
var final_transcript;
var recognizing = false;
//check if using chrome and up to date
if ('webkitSpeechRecognition' in window) {
//init
var recognition = new webkitSpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognition.onstart = function() {
recognizing = true;
};
//if there is error somewhere
recognition.onerror = function(event) {
console.log(event.error);
};
recognition.onend = function() {
recognizing = false;
};
//after giving the spech
recognition.onresult = function(event) {
var interim_transcript = '';
for (var i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
// finilize and show the compleated text
final_transcript += event.results[i][0].transcript;
} else {
// run the speech and output it
interim_transcript += event.results[i][0].transcript;
}
}
final_transcript = capitalize(final_transcript);
final_span.innerHTML = linebreak(final_transcript);
interim_span.innerHTML = linebreak(interim_transcript);
};
}
var two_line = /\n\n/g;
var one_line = /\n/g;
function linebreak(s) {
return s.replace(two_line, '<p></p>').replace(one_line, '<br>');
}
function capitalize(s) {
return s.replace(s.substr(0,1), function(m) { return m.toUpperCase(); });
}
function startDictation(event) {
if (recognizing) {
recognition.stop();
return;
}
final_transcript = '';
recognition.lang = 'en';
recognition.start();
}
//startDictation();
<div id="results">
<span id="final_span" class="final"></span>
<span id="interim_span" class="interim"></span>
</div>
那么有没有办法检测用户何时暂停以及他暂停的时间长度?
这不是一个完整的答案,但它是一个开始。
https://jsfiddle.net/persianturtle/7uygdyy1/1/
我不确定 onspeechstart
事件是否只触发一次,或者如果我在噪音较小的区域它是否会再次触发。
如果它触发多次,这就变得很容易做到,因为我们可以存储 speech start
和 speech end
次并从那里计算出静默期。
如果它 onspeechstart
只触发一次,那么您或许可以找到一种方法来获取给定文本的平均语音持续时间,并从那里找出静音。
但是,每次捕获单词时都会触发一个事件:onresult
。
所以基本思路是定义一个 activity
的数组,您可以 push
将语音数据放入其中,然后分析之后的静默期。
代码:
let activity = []
recognition.onresult = event => {
console.log(event)
activity.push('Ended:' + event.timeStamp)
}
recognition.onspeechstart = event => {
activity.push('Started:' + event.timeStamp)
}
recognition.onend = event => {
console.log(activity)
}