如何在电脑播放音频前暂停语音识别(JS SpeechRecognition),播放完再恢复语音识别?
How to pause speech recognition (JS SpeechRecognition) before audio is played by the computer and then resume speech recognition after being played?
webkitSpeechRecognition有没有办法在电脑播放音频时暂停和恢复语音识别?现在的计算机似乎混淆了用户通过麦克风输入的内容和从 wav 文件输出的音频。
现在我创建了以下内容:
var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
var recognition = new webkitSpeechRecognition();
window.addEventListener('DOMContentLoaded', function() {
document.getElementById("speak_button").addEventListener('click', function() {
recognition.start();
setInterval(updateCountDown,1000); /* countdown timer starts 1 second after
being clicked */
updateCountDown(); //this is a function that counts down from 2 minutes to 0
});
});
var transcript; // transcript variable will store what the user says to the computer
recognition.addEventListener('result', e => {
transcript = Array.from(e.results)
.map(result => result[0])
.map(result => result.transcript)
.join('');
console.log(transcript);
communicateToUser();
});
function communicateToUser() {
var audio_age = new Audio("age_20.wav");
var age_regular_expression = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;
// if regular expression matches all words, then function will be performed
if (age_regular_expression.test(transcript)) {
recognition.stop(); /* wanting the speech recognition to stop here so that it
doesn't capture the contents of audio_age */
audio_age.play(); // audio will play "I am 20 years old"
recognition.start(); /* wanting the speech recognition to start again
after audio_age is played */
}
}
问题是 recognition.stop()
函数不起作用,这意味着麦克风将继续捕获 audio_age.wav
的内容并将其转换为文本。所以,当我想再次对着电脑说话并问它一个问题时,将要分析的抄本将包括我刚才讲话时的抄本。
如有任何建议,我们将不胜感激。
我正在考虑一个解决方案,但我不确定如何实施它:
解决方案:
停止识别功能并延迟与音频文件播放相同的秒数(例如5秒),然后在这5秒后再次启动识别功能?
谢谢!
为 CESARE 编辑:
// SPEECH RECOGNITION SET UP
var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
var recognition = new webkitSpeechRecognition();
window.addEventListener('DOMContentLoaded', function() {
document.getElementById("speak_button").addEventListener('click', function() {
recognition.start();
setInterval(updateCountDown,1000);
updateCountDown();
});
});
// ALL OF THE AUDIO FILES --> WILL BE PLAYED IF REGEX MATCHES TRUE
const audio_name = new Audio("name_harry.wav");
const audio_age = new Audio("age_20.wav");
const audio_date_of_birth = new Audio("15_nov_1999.wav");
const audio_occupation = new Audio("grocery_store.wav");
// ON SPEECH START --> IF MICROPHONE INPUT IS DETECTED, THEN SPEECH RECOGNITION STARTS
recognition.onspeechstart = () => {
console.log("SPEECH STARTED");
if (!audio_age.paused) {audio_age.pause()}
else if (!audio_name.paused) {audio_name.pause()}
else if (!audio_date_of_birth.paused) {audio_date_of_birth.pause()}
else if (!audio_occupation.paused) {audio_occupation.pause()}
};
// ON SPEECH END --> WHEN MICROPHONE INPUT STOPS, SPEECH RECOGNITION SHOULD END
recognition.onspeechend = () => {
console.log("SPEECH ENDED");
recognition.stop();
};
// I have included this because I want the computer to continue listening to the user, but only after the audio is finished playing
recognition.addEventListener('end', recognition.start);
// After audio is ended, speech recognition will start again
audio_name.addEventListener('ended', recognition.start);
audio_age.addEventListener('ended', recognition.start);
audio_date_of_birth.addEventListener('ended', recognition.start);
audio_occupation.addEventListener('ended', recognition.start);
audio_height.addEventListener('ended', recognition.start);
// USED TO OBTAIN THE USER TRANSCRIPT/ACTUAL SPEECH CONTENT
var transcript;
recognition.addEventListener('result', e => {
transcript = Array.from(e.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join('');
console.log(transcript);
communicateToUser();
});
// ALL OF THE REGULAR EXPRESSIONS
const name_regex = /what is your name|(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bcan\b)(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\blet\b)(?=.*\bknow\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bshare\b)(?=.*\bme\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bfirst\b)(?=.*\band\b)(?=.*\blast\b)(?=.*\bname\b)/ig;
const age_regex = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;
const date_of_birth_regex = /(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhat\b)(?=.*\bdate\b)(?=.*\byou\b)(?=.*\bborn\b)/gi
const patient_occupation = /do you have a job|(?=.*\bdo\b)(?=.*\byou\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bhave\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\bwhere\b)|(?=.*\banything\b)(?=.*\bfor\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\banywhere\b)|(?=.*\bwhat\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\boccupation\b)|(?=.*\byou\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bjob\b)|(?=.*\bjob\b)/ig;
// COMMUNICATE BACK TO USER FUNCTION
function communicateToUser() {
if (name_regex.test(transcript)) {
audio_name.play();
}
if (age_regex.test(transcript)) {
audio_age.play();
}
if (date_of_birth_regex.test(transcript)) {
audio_date_of_birth.play();
}
if (occuptation_regex.test(transcript)) {
audio_occupation.play();
}
}
更新倒计时函数
function updateCountDown() {
const minutes = Math.floor(time / 60);
let seconds = time % 60;
seconds = seconds < 2 ? '0' + seconds : seconds;
document.getElementById("countdown").innerHTML = `${minutes}:${seconds}`;
time--;
time = time < 0 ? 0 : time;
if (minutes == 0 && seconds == 0) {
document.getElementById('tableStyle').style.display = "block";
recognition.stop(); //ADDING IN RECOGNITION.STOP ONCE MINUTES AND SECONDS == 0!
}
};
编辑:
我做了一个工作示例,https://stackblitz.com/edit/web-platform-ppcuh9?file=index.html:
let isListening = false; // use this flag to toggle the recognition
let interval;
const button = document.getElementById('speak_button');
const speaker = new MakeSpeechSynth({
pitch: 0.5,
rate: 0.8,
language: 'en-US',
});
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();
button.addEventListener('click', function() {
if (isListening) {
console.log('ABORTING RECOGNITION');
isListening = false;
recognition.abort();
clearInterval(interval);
button.innerText = 'Click Me To Speak';
} else {
console.log('STARTING RECOGNITION');
recognition.start();
interval = setInterval(updateCountDown, 1000);
updateCountDown();
button.innerText = 'Stop Recognition';
isListening = true;
}
});
recognition.onaudiostart = () => {
console.log('RECOGNITION STARTED');
};
recognition.onaudioend = () => {
console.log('RECOGNITION FINISHED');
};
recognition.onend = () => {
console.log('RECOGNITION DISCONNECTED');
if (isListening) recognition.start();
};
recognition.onspeechstart = () => {
console.log('SPEECH STARTED');
// You can stop the bot speaking if you want when you speak over him:
// Comment if you want him to keep speaking
//Object.values(data).forEach((d) => d.audio.pause());
if (speaker.isSpeaking) speaker.cancel();
};
recognition.onspeechend = () => {
console.log('SPEECH ENDED');
};
recognition.addEventListener('result', (e) => {
const transcript = Array.from(e.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join('');
console.log(transcript);
speakBackToMe(transcript);
});
function speakBackToMe(str) {
Object.values(data).forEach((d) => {
if (d.regex.test(str)) {
// d.audio.play();
speaker.speak(d.message);
console.log(d.message);
}
});
}
// UPDATE COUNTDOWN
const startingMinutes = 2;
let time = startingMinutes * 60;
function updateCountDown() {
const minutes = Math.floor(time / 60);
let seconds = time % 60;
seconds = seconds < 2 ? '0' + seconds : seconds;
document.getElementById('countdown').innerHTML = `${minutes}:${seconds}`;
time--;
time = time < 0 ? 0 : time;
if (minutes == 0 && seconds == 0) {
document.getElementById('tableStyle').style.display = 'table-cell';
}
}
<div id="app"></div>
<button id="speak_button">Click Me to Speak</button>
<p id="countdown"></p>
webkitSpeechRecognition有没有办法在电脑播放音频时暂停和恢复语音识别?现在的计算机似乎混淆了用户通过麦克风输入的内容和从 wav 文件输出的音频。
现在我创建了以下内容:
var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
var recognition = new webkitSpeechRecognition();
window.addEventListener('DOMContentLoaded', function() {
document.getElementById("speak_button").addEventListener('click', function() {
recognition.start();
setInterval(updateCountDown,1000); /* countdown timer starts 1 second after
being clicked */
updateCountDown(); //this is a function that counts down from 2 minutes to 0
});
});
var transcript; // transcript variable will store what the user says to the computer
recognition.addEventListener('result', e => {
transcript = Array.from(e.results)
.map(result => result[0])
.map(result => result.transcript)
.join('');
console.log(transcript);
communicateToUser();
});
function communicateToUser() {
var audio_age = new Audio("age_20.wav");
var age_regular_expression = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;
// if regular expression matches all words, then function will be performed
if (age_regular_expression.test(transcript)) {
recognition.stop(); /* wanting the speech recognition to stop here so that it
doesn't capture the contents of audio_age */
audio_age.play(); // audio will play "I am 20 years old"
recognition.start(); /* wanting the speech recognition to start again
after audio_age is played */
}
}
问题是 recognition.stop()
函数不起作用,这意味着麦克风将继续捕获 audio_age.wav
的内容并将其转换为文本。所以,当我想再次对着电脑说话并问它一个问题时,将要分析的抄本将包括我刚才讲话时的抄本。
如有任何建议,我们将不胜感激。
我正在考虑一个解决方案,但我不确定如何实施它:
解决方案:
停止识别功能并延迟与音频文件播放相同的秒数(例如5秒),然后在这5秒后再次启动识别功能?
谢谢!
为 CESARE 编辑:
// SPEECH RECOGNITION SET UP
var speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
var recognition = new webkitSpeechRecognition();
window.addEventListener('DOMContentLoaded', function() {
document.getElementById("speak_button").addEventListener('click', function() {
recognition.start();
setInterval(updateCountDown,1000);
updateCountDown();
});
});
// ALL OF THE AUDIO FILES --> WILL BE PLAYED IF REGEX MATCHES TRUE
const audio_name = new Audio("name_harry.wav");
const audio_age = new Audio("age_20.wav");
const audio_date_of_birth = new Audio("15_nov_1999.wav");
const audio_occupation = new Audio("grocery_store.wav");
// ON SPEECH START --> IF MICROPHONE INPUT IS DETECTED, THEN SPEECH RECOGNITION STARTS
recognition.onspeechstart = () => {
console.log("SPEECH STARTED");
if (!audio_age.paused) {audio_age.pause()}
else if (!audio_name.paused) {audio_name.pause()}
else if (!audio_date_of_birth.paused) {audio_date_of_birth.pause()}
else if (!audio_occupation.paused) {audio_occupation.pause()}
};
// ON SPEECH END --> WHEN MICROPHONE INPUT STOPS, SPEECH RECOGNITION SHOULD END
recognition.onspeechend = () => {
console.log("SPEECH ENDED");
recognition.stop();
};
// I have included this because I want the computer to continue listening to the user, but only after the audio is finished playing
recognition.addEventListener('end', recognition.start);
// After audio is ended, speech recognition will start again
audio_name.addEventListener('ended', recognition.start);
audio_age.addEventListener('ended', recognition.start);
audio_date_of_birth.addEventListener('ended', recognition.start);
audio_occupation.addEventListener('ended', recognition.start);
audio_height.addEventListener('ended', recognition.start);
// USED TO OBTAIN THE USER TRANSCRIPT/ACTUAL SPEECH CONTENT
var transcript;
recognition.addEventListener('result', e => {
transcript = Array.from(e.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join('');
console.log(transcript);
communicateToUser();
});
// ALL OF THE REGULAR EXPRESSIONS
const name_regex = /what is your name|(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bcan\b)(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\btell\b)(?=.*\bme\b)(?=.*\byour\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\blet\b)(?=.*\bknow\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bname\b)|(?=.*\bshare\b)(?=.*\bme\b)(?=.*\bfull\b)(?=.*\bname\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bfirst\b)(?=.*\band\b)(?=.*\blast\b)(?=.*\bname\b)/ig;
const age_regex = /(?=.*\bhow\b)(?=.*\bold\b)(?=.*\byou\b)|(?=.*\bgrab\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bage\b)| (?=.*\btell\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bage\b)|(?=.*\bshare\b)(?=.*\bhow\b)(?=.*\bold\b)|(?=.*\byou\b)(?=.*\bhow\b)(?=.*\bold\b)/gi;
const date_of_birth_regex = /(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\byour\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bshare\b)(?=.*\bdate\b)(?=.*\bof\b)(?=.*\bbirth\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhen\b)(?=.*\byou\b)(?=.*\bborn\b)|(?=.*\bwhat\b)(?=.*\bdate\b)(?=.*\byou\b)(?=.*\bborn\b)/gi
const patient_occupation = /do you have a job|(?=.*\bdo\b)(?=.*\byou\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bhave\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\bwhere\b)|(?=.*\banything\b)(?=.*\bfor\b)(?=.*\bwork\b)|(?=.*\byou\b)(?=.*\bwork\b)(?=.*\banywhere\b)|(?=.*\bwhat\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\boccupation\b)|(?=.*\byou\b)(?=.*\boccupation\b)|(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\bwhat\b)(?=.*\byour\b)(?=.*\bjob\b)|(?=.*\byou\b)(?=.*\bjob\b)|(?=.*\bjob\b)/ig;
// COMMUNICATE BACK TO USER FUNCTION
function communicateToUser() {
if (name_regex.test(transcript)) {
audio_name.play();
}
if (age_regex.test(transcript)) {
audio_age.play();
}
if (date_of_birth_regex.test(transcript)) {
audio_date_of_birth.play();
}
if (occuptation_regex.test(transcript)) {
audio_occupation.play();
}
}
更新倒计时函数
function updateCountDown() {
const minutes = Math.floor(time / 60);
let seconds = time % 60;
seconds = seconds < 2 ? '0' + seconds : seconds;
document.getElementById("countdown").innerHTML = `${minutes}:${seconds}`;
time--;
time = time < 0 ? 0 : time;
if (minutes == 0 && seconds == 0) {
document.getElementById('tableStyle').style.display = "block";
recognition.stop(); //ADDING IN RECOGNITION.STOP ONCE MINUTES AND SECONDS == 0!
}
};
编辑:
我做了一个工作示例,https://stackblitz.com/edit/web-platform-ppcuh9?file=index.html:
let isListening = false; // use this flag to toggle the recognition
let interval;
const button = document.getElementById('speak_button');
const speaker = new MakeSpeechSynth({
pitch: 0.5,
rate: 0.8,
language: 'en-US',
});
const SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
const recognition = new SpeechRecognition();
button.addEventListener('click', function() {
if (isListening) {
console.log('ABORTING RECOGNITION');
isListening = false;
recognition.abort();
clearInterval(interval);
button.innerText = 'Click Me To Speak';
} else {
console.log('STARTING RECOGNITION');
recognition.start();
interval = setInterval(updateCountDown, 1000);
updateCountDown();
button.innerText = 'Stop Recognition';
isListening = true;
}
});
recognition.onaudiostart = () => {
console.log('RECOGNITION STARTED');
};
recognition.onaudioend = () => {
console.log('RECOGNITION FINISHED');
};
recognition.onend = () => {
console.log('RECOGNITION DISCONNECTED');
if (isListening) recognition.start();
};
recognition.onspeechstart = () => {
console.log('SPEECH STARTED');
// You can stop the bot speaking if you want when you speak over him:
// Comment if you want him to keep speaking
//Object.values(data).forEach((d) => d.audio.pause());
if (speaker.isSpeaking) speaker.cancel();
};
recognition.onspeechend = () => {
console.log('SPEECH ENDED');
};
recognition.addEventListener('result', (e) => {
const transcript = Array.from(e.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join('');
console.log(transcript);
speakBackToMe(transcript);
});
function speakBackToMe(str) {
Object.values(data).forEach((d) => {
if (d.regex.test(str)) {
// d.audio.play();
speaker.speak(d.message);
console.log(d.message);
}
});
}
// UPDATE COUNTDOWN
const startingMinutes = 2;
let time = startingMinutes * 60;
function updateCountDown() {
const minutes = Math.floor(time / 60);
let seconds = time % 60;
seconds = seconds < 2 ? '0' + seconds : seconds;
document.getElementById('countdown').innerHTML = `${minutes}:${seconds}`;
time--;
time = time < 0 ? 0 : time;
if (minutes == 0 && seconds == 0) {
document.getElementById('tableStyle').style.display = 'table-cell';
}
}
<div id="app"></div>
<button id="speak_button">Click Me to Speak</button>
<p id="countdown"></p>