语音转文本:使用 NodeJS 将麦克风流通过管道传输到 Watson STT
Speech to Text: Piping microphone stream to Watson STT with NodeJS
我目前正在尝试将麦克风流发送到 Watson STT 服务,但出于某种原因,Watson 服务没有接收到流(我猜),所以我收到错误 "Error: No speech detected for 30s"。
请注意,我已将一个 .wav 文件流式传输到 Watson,并且还测试了将 micInputStream 管道传输到我的本地文件,因此我知道两者至少都已正确设置。我是 NodeJS 的新手/javascript 所以我希望错误可能是显而易见的。
const fs = require('fs');
const mic = require('mic');
var SpeechToTextV1 = require('watson-developer-cloud/speech-to-text/v1');
var speechToText = new SpeechToTextV1({
iam_apikey: '{key_here}',
url: 'https://stream.watsonplatform.net/speech-to-text/api'
});
var params = {
content_type: 'audio/l16; rate=44100; channels=2',
interim_results: true
};
const micParams = {
rate: 44100,
channels: 2,
debug: false,
exitOnSilence: 6
}
const micInstance = mic(micParams);
const micInputStream = micInstance.getAudioStream();
micInstance.start();
console.log('Watson is listening, you may speak now.');
// Create the stream.
var recognizeStream = speechToText.recognizeUsingWebSocket(params);
// Pipe in the audio.
var textStream = micInputStream.pipe(recognizeStream).setEncoding('utf8');
textStream.on('data', user_speech_text => console.log('Watson hears:', user_speech_text));
textStream.on('error', e => console.log(`error: ${e}`));
textStream.on('close', e => console.log(`close: ${e}`));
结论: 最后,我不完全确定代码有什么问题。我猜这与麦克风包有关。我最终报废了包裹并使用 "Node-audiorecorder" 代替我的音频流 https://www.npmjs.com/package/node-audiorecorder
注意:此模块要求您安装 SoX,并且它必须在您的 $PATH 中可用。 http://sox.sourceforge.net/
更新代码: 对于任何想知道我的最终代码是什么样子的人,请看这里。还要感谢 NikolayShmyrev 试图帮助我编写代码!
抱歉评论太多,但对于新项目,我想确保我知道每一行都在做什么。
// Import module.
var AudioRecorder = require('node-audiorecorder');
var fs = require('fs');
var SpeechToTextV1 = require('watson-developer-cloud/speech-to-text/v1');
/******************************************************************************
* Configuring STT
*******************************************************************************/
var speechToText = new SpeechToTextV1({
iam_apikey: '{your watson key here}',
url: 'https://stream.watsonplatform.net/speech-to-text/api'
});
var recognizeStream = speechToText.recognizeUsingWebSocket({
content_type: 'audio/wav',
interim_results: true
});
/******************************************************************************
* Configuring the Recording
*******************************************************************************/
// Options is an optional parameter for the constructor call.
// If an option is not given the default value, as seen below, will be used.
const options = {
program: 'rec', // Which program to use, either `arecord`, `rec`, or `sox`.
device: null, // Recording device to use.
bits: 16, // Sample size. (only for `rec` and `sox`)
channels: 2, // Channel count.
encoding: 'signed-integer', // Encoding type. (only for `rec` and `sox`)
rate: 48000, // Sample rate.
type: 'wav', // Format type.
// Following options only available when using `rec` or `sox`.
silence: 6, // Duration of silence in seconds before it stops recording.
keepSilence: true // Keep the silence in the recording.
};
const logger = console;
/******************************************************************************
* Create Streams
*******************************************************************************/
// Create an instance.
let audioRecorder = new AudioRecorder(options, logger);
//create timeout (so after 10 seconds it stops feel free to remove this)
setTimeout(function() {
audioRecorder.stop();
}, 10000);
// This line is for saving the file locally as well (Strongly encouraged for testing)
const fileStream = fs.createWriteStream("test.wav", { encoding: 'binary' });
// Start stream to Watson STT Remove .pipe(process.stdout) if you dont want translation printed to console
audioRecorder.start().stream().pipe(recognizeStream).pipe(process.stdout);
//Create another stream to save locally
audioRecorder.stream().pipe(fileStream);
//Finally pipe translation to transcription file
recognizeStream.pipe(fs.createWriteStream('./transcription.txt'));
我目前正在尝试将麦克风流发送到 Watson STT 服务,但出于某种原因,Watson 服务没有接收到流(我猜),所以我收到错误 "Error: No speech detected for 30s"。
请注意,我已将一个 .wav 文件流式传输到 Watson,并且还测试了将 micInputStream 管道传输到我的本地文件,因此我知道两者至少都已正确设置。我是 NodeJS 的新手/javascript 所以我希望错误可能是显而易见的。
const fs = require('fs');
const mic = require('mic');
var SpeechToTextV1 = require('watson-developer-cloud/speech-to-text/v1');
var speechToText = new SpeechToTextV1({
iam_apikey: '{key_here}',
url: 'https://stream.watsonplatform.net/speech-to-text/api'
});
var params = {
content_type: 'audio/l16; rate=44100; channels=2',
interim_results: true
};
const micParams = {
rate: 44100,
channels: 2,
debug: false,
exitOnSilence: 6
}
const micInstance = mic(micParams);
const micInputStream = micInstance.getAudioStream();
micInstance.start();
console.log('Watson is listening, you may speak now.');
// Create the stream.
var recognizeStream = speechToText.recognizeUsingWebSocket(params);
// Pipe in the audio.
var textStream = micInputStream.pipe(recognizeStream).setEncoding('utf8');
textStream.on('data', user_speech_text => console.log('Watson hears:', user_speech_text));
textStream.on('error', e => console.log(`error: ${e}`));
textStream.on('close', e => console.log(`close: ${e}`));
结论: 最后,我不完全确定代码有什么问题。我猜这与麦克风包有关。我最终报废了包裹并使用 "Node-audiorecorder" 代替我的音频流 https://www.npmjs.com/package/node-audiorecorder
注意:此模块要求您安装 SoX,并且它必须在您的 $PATH 中可用。 http://sox.sourceforge.net/
更新代码: 对于任何想知道我的最终代码是什么样子的人,请看这里。还要感谢 NikolayShmyrev 试图帮助我编写代码!
抱歉评论太多,但对于新项目,我想确保我知道每一行都在做什么。
// Import module.
var AudioRecorder = require('node-audiorecorder');
var fs = require('fs');
var SpeechToTextV1 = require('watson-developer-cloud/speech-to-text/v1');
/******************************************************************************
* Configuring STT
*******************************************************************************/
var speechToText = new SpeechToTextV1({
iam_apikey: '{your watson key here}',
url: 'https://stream.watsonplatform.net/speech-to-text/api'
});
var recognizeStream = speechToText.recognizeUsingWebSocket({
content_type: 'audio/wav',
interim_results: true
});
/******************************************************************************
* Configuring the Recording
*******************************************************************************/
// Options is an optional parameter for the constructor call.
// If an option is not given the default value, as seen below, will be used.
const options = {
program: 'rec', // Which program to use, either `arecord`, `rec`, or `sox`.
device: null, // Recording device to use.
bits: 16, // Sample size. (only for `rec` and `sox`)
channels: 2, // Channel count.
encoding: 'signed-integer', // Encoding type. (only for `rec` and `sox`)
rate: 48000, // Sample rate.
type: 'wav', // Format type.
// Following options only available when using `rec` or `sox`.
silence: 6, // Duration of silence in seconds before it stops recording.
keepSilence: true // Keep the silence in the recording.
};
const logger = console;
/******************************************************************************
* Create Streams
*******************************************************************************/
// Create an instance.
let audioRecorder = new AudioRecorder(options, logger);
//create timeout (so after 10 seconds it stops feel free to remove this)
setTimeout(function() {
audioRecorder.stop();
}, 10000);
// This line is for saving the file locally as well (Strongly encouraged for testing)
const fileStream = fs.createWriteStream("test.wav", { encoding: 'binary' });
// Start stream to Watson STT Remove .pipe(process.stdout) if you dont want translation printed to console
audioRecorder.start().stream().pipe(recognizeStream).pipe(process.stdout);
//Create another stream to save locally
audioRecorder.stream().pipe(fileStream);
//Finally pipe translation to transcription file
recognizeStream.pipe(fs.createWriteStream('./transcription.txt'));