使用 java 中的麦克风通过 VOSK 进行语音识别
Use the microphone in java for speech recognition with VOSK
我正在尝试将实时语音识别添加到我的 java 项目中(最好是离线)。通过谷歌搜索和尝试其他解决方案,我决定使用 VOSK 进行语音识别。然而,我遇到的主要问题是 VOSK 的文档很少,并且只有一个 java 的示例文件,用于从预先录制的 wav 文件中提取文本,如下所示。
public static void main(String[] argv) throws IOException, UnsupportedAudioFileException {
LibVosk.setLogLevel(LogLevel.DEBUG);
try (Model model = new Model("src\main\resources\model");
InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("src\main\resources\python_example_test.wav")));
Recognizer recognizer = new Recognizer(model, 16000)) {
int nbytes;
byte[] b = new byte[4096];
while ((nbytes = ais.read(b)) >= 0) {
System.out.println(nbytes);
if (recognizer.acceptWaveForm(b, nbytes)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
}
}
我试图将它转换成可以接受麦克风音频的东西,如下所示:
public static void main(String[] args) {
LibVosk.setLogLevel(LogLevel.DEBUG);
AudioFormat format = new AudioFormat(8000.0f, 16, 1, true, true);
TargetDataLine microphone;
SourceDataLine speakers;
try (Model model = new Model("src\main\resources\model");
Recognizer recognizer = new Recognizer(model, 16000)) {
try {
microphone = AudioSystem.getTargetDataLine(format);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
ByteArrayOutputStream out = new ByteArrayOutputStream();
int numBytesRead;
int CHUNK_SIZE = 1024;
int bytesRead = 0;
DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
speakers.open(format);
speakers.start();
byte[] b = new byte[4096];
while (bytesRead <= 100000) {
numBytesRead = microphone.read(b, 0, CHUNK_SIZE);
bytesRead += numBytesRead;
out.write(b, 0, numBytesRead);
speakers.write(b, 0, numBytesRead);
if (recognizer.acceptWaveForm(b, numBytesRead)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
speakers.drain();
speakers.close();
microphone.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
这似乎正确地捕获了麦克风数据(因为它也输出到扬声器)但 VOSK 显示没有输入,不断地将结果打印为空字符串。我究竟做错了什么?我正在尝试的是什至可能的吗?我应该尝试寻找不同的语音识别库吗?
这段代码对我来说工作正常你可以使用这个:
public static void main(String[] args) {
LibVosk.setLogLevel(LogLevel.DEBUG);
AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 60000, 16, 2, 4, 44100, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
TargetDataLine microphone;
SourceDataLine speakers;
try (Model model = new Model("model");
Recognizer recognizer = new Recognizer(model, 120000)) {
try {
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
ByteArrayOutputStream out = new ByteArrayOutputStream();
int numBytesRead;
int CHUNK_SIZE = 1024;
int bytesRead = 0;
DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
speakers.open(format);
speakers.start();
byte[] b = new byte[4096];
while (bytesRead <= 100000000) {
numBytesRead = microphone.read(b, 0, CHUNK_SIZE);
bytesRead += numBytesRead;
out.write(b, 0, numBytesRead);
speakers.write(b, 0, numBytesRead);
if (recognizer.acceptWaveForm(b, numBytesRead)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
speakers.drain();
speakers.close();
microphone.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
我正在尝试将实时语音识别添加到我的 java 项目中(最好是离线)。通过谷歌搜索和尝试其他解决方案,我决定使用 VOSK 进行语音识别。然而,我遇到的主要问题是 VOSK 的文档很少,并且只有一个 java 的示例文件,用于从预先录制的 wav 文件中提取文本,如下所示。
public static void main(String[] argv) throws IOException, UnsupportedAudioFileException {
LibVosk.setLogLevel(LogLevel.DEBUG);
try (Model model = new Model("src\main\resources\model");
InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream("src\main\resources\python_example_test.wav")));
Recognizer recognizer = new Recognizer(model, 16000)) {
int nbytes;
byte[] b = new byte[4096];
while ((nbytes = ais.read(b)) >= 0) {
System.out.println(nbytes);
if (recognizer.acceptWaveForm(b, nbytes)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
}
}
我试图将它转换成可以接受麦克风音频的东西,如下所示:
public static void main(String[] args) {
LibVosk.setLogLevel(LogLevel.DEBUG);
AudioFormat format = new AudioFormat(8000.0f, 16, 1, true, true);
TargetDataLine microphone;
SourceDataLine speakers;
try (Model model = new Model("src\main\resources\model");
Recognizer recognizer = new Recognizer(model, 16000)) {
try {
microphone = AudioSystem.getTargetDataLine(format);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
ByteArrayOutputStream out = new ByteArrayOutputStream();
int numBytesRead;
int CHUNK_SIZE = 1024;
int bytesRead = 0;
DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
speakers.open(format);
speakers.start();
byte[] b = new byte[4096];
while (bytesRead <= 100000) {
numBytesRead = microphone.read(b, 0, CHUNK_SIZE);
bytesRead += numBytesRead;
out.write(b, 0, numBytesRead);
speakers.write(b, 0, numBytesRead);
if (recognizer.acceptWaveForm(b, numBytesRead)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
speakers.drain();
speakers.close();
microphone.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
这似乎正确地捕获了麦克风数据(因为它也输出到扬声器)但 VOSK 显示没有输入,不断地将结果打印为空字符串。我究竟做错了什么?我正在尝试的是什至可能的吗?我应该尝试寻找不同的语音识别库吗?
这段代码对我来说工作正常你可以使用这个:
public static void main(String[] args) {
LibVosk.setLogLevel(LogLevel.DEBUG);
AudioFormat format = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 60000, 16, 2, 4, 44100, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
TargetDataLine microphone;
SourceDataLine speakers;
try (Model model = new Model("model");
Recognizer recognizer = new Recognizer(model, 120000)) {
try {
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
ByteArrayOutputStream out = new ByteArrayOutputStream();
int numBytesRead;
int CHUNK_SIZE = 1024;
int bytesRead = 0;
DataLine.Info dataLineInfo = new DataLine.Info(SourceDataLine.class, format);
speakers = (SourceDataLine) AudioSystem.getLine(dataLineInfo);
speakers.open(format);
speakers.start();
byte[] b = new byte[4096];
while (bytesRead <= 100000000) {
numBytesRead = microphone.read(b, 0, CHUNK_SIZE);
bytesRead += numBytesRead;
out.write(b, 0, numBytesRead);
speakers.write(b, 0, numBytesRead);
if (recognizer.acceptWaveForm(b, numBytesRead)) {
System.out.println(recognizer.getResult());
} else {
System.out.println(recognizer.getPartialResult());
}
}
System.out.println(recognizer.getFinalResult());
speakers.drain();
speakers.close();
microphone.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}