如何将 android 语音中的输入更改为文本
How to change the input in android speech to text
我对 android 编程有点陌生,我最近发现了 android 上可用的语音文本 api。我在网上找到了很多教程,它们很好地解释了如何使用这个功能,但它们的工作方式都是一样的:应用程序使用一个意图来开始识别,而当你对它进行编程时,你没有指定输入。
我的问题是:是否可以像在 Audiorecord 中那样做,并确定我们要使用哪个音频源? (例如 MediaRecorder.AudioSource.MIC)?
我认为这是执行此操作的标准方法,但这是我实现 SpeechToText 的方法:
private void askSpeechInput() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.US);
try {
startActivityForResult(intent, REQ_CODE_SPEECH_INPUT);
} catch (ActivityNotFoundException a) {
}
}
他们用我在 return
中得到的文本做任何我想做的事
@Override
public void onActivityResult(int requestCode, int resultCode, Intent data) {
super.onActivityResult(requestCode, resultCode, data);
switch (requestCode) {
case REQ_CODE_SPEECH_INPUT: {
if (resultCode == RESULT_OK && null != data) {
ArrayList<String> result = data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
String message = "";
message = result.get(0);
//Do whatever i want with my message
}
break;
}
}
}
所以这段代码可以获取麦克风输入,但是否可以更改它?
好吧,我不知道它是否对任何人有帮助,但我找到了解决这个问题的方法。
首先,感谢MediaRecorder.AudioSource,我使用录音机使用我想要的输入录制声音,并将其保存到文件中。
private void startRecording() {
recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
RECORDER_SAMPLERATE, RECORDER_CHANNELS,
RECORDER_AUDIO_ENCODING, BufferElements2Rec * BytesPerElement);
recorder.startRecording();
isRecording = true;
recordingThread = new Thread(new Runnable() {
public void run() {
writeAudioDataToFile();
}
}, "AudioRecorder Thread");
recordingThread.start();
}
之后,我使用找到的 flac 编码器将 .wav 编码为 .flac。
最后,我找到了一些代码,允许我将 flac 文件直接发送到 Google API,并接收我想要的文本!
public void getTranscription(int sampleRate) {
File myfil = new File(fileName);
if (!myfil.canRead()) {
Log.d("ParseStarter", "FATAL no read access");
System.out.println("FATAL CAN'T READ");
}
// first is a GET for the speech-api DOWNSTREAM
// then a future exec for the UPSTREAM / chunked encoding used so as not
// to limit
// the POST body sz
PAIR = MIN + (long) (Math.random() * ((MAX - MIN) + 1L));
// DOWN URL just like in curl full-duplex example plus the handler
downChannel(API_DOWN_URL + PAIR, messageHandler);
// UP chan, process the audio byteStream for interface to UrlConnection
// using 'chunked-encoding'
FileInputStream fis;
try {
fis = new FileInputStream(myfil);
FileChannel fc = fis.getChannel(); // Get the file's size and then
// map it into memory
int sz = (int) fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
byte[] data2 = new byte[bb.remaining()];
Log.d("ParseStarter", "mapfil " + sz + " " + bb.remaining());
bb.get(data2);
// conform to the interface from the curl examples on full-duplex
// calls
// see curl examples full-duplex for more on 'PAIR'. Just a globally
// uniq value typ=long->String.
// API KEY value is part of value in UP_URL_p2
upChannel(root + up_p1 + PAIR + up_p2 + api_key, messageHandler2,
data2);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void downChannel(String urlStr, final Handler messageHandler) {
final String url = urlStr;
new Thread() {
Bundle b;
public void run() {
String response = "NAO FOI";
Message msg = Message.obtain();
msg.what = 1;
// handler for DOWN channel http response stream - httpsUrlConn
// response handler should manage the connection.... ??
// assign a TIMEOUT Value that exceeds by a safe factor
// the amount of time that it will take to write the bytes
// to the UPChannel in a fashion that mimics a liveStream
// of the audio at the applicable Bitrate. BR=sampleRate * bits
// per sample
// Note that the TLS session uses
// "* SSLv3, TLS alert, Client hello (1): "
// to wake up the listener when there are additional bytes.
// The mechanics of the TLS session should be transparent. Just
// use
// httpsUrlConn and allow it enough time to do its work.
Scanner inStream = openHttpsConnection(url);
// process the stream and store it in StringBuilder
while (inStream.hasNextLine()) {
b = new Bundle();
b.putString("text", inStream.nextLine());
msg.setData(b);
messageHandler.dispatchMessage(msg);
}
}
}.start();
}
private void upChannel(String urlStr, final Handler messageHandler,
byte[] arg3) {
final String murl = urlStr;
final byte[] mdata = arg3;
Log.d("ParseStarter", "upChan " + mdata.length);
new Thread() {
public void run() {
String response = "NAO FOI";
Message msg = Message.obtain();
msg.what = 2;
Scanner inStream = openHttpsPostConnection(murl, mdata);
inStream.hasNext();
// process the stream and store it in StringBuilder
while (inStream.hasNextLine()) {
response += (inStream.nextLine());
Log.d("ParseStarter", "POST resp " + response.length());
}
Bundle b = new Bundle();
b.putString("post", response);
msg.setData(b);
// in.close(); // mind the resources
messageHandler.sendMessage(msg);
}
}.start();
}
// GET for DOWNSTREAM
private Scanner openHttpsConnection(String urlStr) {
InputStream in = null;
int resCode = -1;
Log.d("ParseStarter", "dwnURL " + urlStr);
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection) urlConn;
httpConn.setAllowUserInteraction(false);
// TIMEOUT is required
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("GET");
httpConn.connect();
resCode = httpConn.getResponseCode();
if (resCode == HttpsURLConnection.HTTP_OK) {
return new Scanner(httpConn.getInputStream());
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// GET for UPSTREAM
private Scanner openHttpsPostConnection(String urlStr, byte[] data) {
InputStream in = null;
byte[] mextrad = data;
int resCode = -1;
OutputStream out = null;
// int http_status;
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection) urlConn;
httpConn.setAllowUserInteraction(false);
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setChunkedStreamingMode(0);
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate="
+ rate );
httpConn.connect();
try {
// this opens a connection, then sends POST & headers.
out = httpConn.getOutputStream();
// Note : if the audio is more than 15 seconds
// dont write it to UrlConnInputStream all in one block as this
// sample does.
// Rather, segment the byteArray and on intermittently, sleeping
// thread
// supply bytes to the urlConn Stream at a rate that approaches
// the bitrate ( =30K per sec. in this instance ).
Log.d("ParseStarter", "IO beg on data");
out.write(mextrad); // one big block supplied instantly to the
// underlying chunker wont work for duration
// > 15 s.
Log.d("ParseStarter", "IO fin on data");
// do you need the trailer?
// NOW you can look at the status.
resCode = httpConn.getResponseCode();
Log.d("ParseStarter", "POST OK resp "
+ httpConn.getResponseMessage().getBytes().toString());
if (resCode / 100 != 2) {
Log.d("ParseStarter", "POST bad io ");
}
} catch (IOException e) {
Log.d("ParseStarter", "FATAL " + e);
}
if (resCode == HttpsURLConnection.HTTP_OK) {
Log.d("ParseStarter", "OK RESP to POST return scanner ");
return new Scanner(httpConn.getInputStream());
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// DOWN handler
Handler messageHandler = new Handler() {
public void handleMessage(Message msg) {
super.handleMessage(msg);
switch (msg.what) {
case 1: // GET DOWNSTREAM json id="@+id/comment"
String mtxt = msg.getData().getString("text");
if (mtxt.length() > 20) {
final String f_msg = mtxt;
handler.post(new Runnable() { // This thread runs in the UI
// TREATMENT FOR GOOGLE RESPONSE
@Override
public void run() {
System.out.println(f_msg);
String message = "";
final ChatMessage chatMessage = new ChatMessage(user1, user2,
message, "" + random.nextInt(1000), true);
message = f_msg;
chatMessage.setMsgID();
chatMessage.setMsgID();
chatMessage.body = message;
chatMessage.Date = CommonMethods.getCurrentDate();
chatMessage.Time = CommonMethods.getCurrentTime();
msg_edittext.setText("");
chatAdapter.add(chatMessage);
chatAdapter.notifyDataSetChanged();
}
});
}
break;
case 2:
break;
}
}
}; // doDOWNSTRM Handler end
// UPSTREAM channel. its servicing a thread and should have its own handler
Handler messageHandler2 = new Handler() {
public void handleMessage(Message msg) {
super.handleMessage(msg);
switch (msg.what) {
case 1: // GET DOWNSTREAM json
Log.d("ParseStarter", msg.getData().getString("post"));
break;
case 2:
Log.d("ParseStarter", msg.getData().getString("post"));
break;
}
}
}; // UPstream handler end
我从 this 项目中获得了这部分代码,其中与 google api 的连接有效,但文件编码器似乎已过时。
我对 android 编程有点陌生,我最近发现了 android 上可用的语音文本 api。我在网上找到了很多教程,它们很好地解释了如何使用这个功能,但它们的工作方式都是一样的:应用程序使用一个意图来开始识别,而当你对它进行编程时,你没有指定输入。
我的问题是:是否可以像在 Audiorecord 中那样做,并确定我们要使用哪个音频源? (例如 MediaRecorder.AudioSource.MIC)?
我认为这是执行此操作的标准方法,但这是我实现 SpeechToText 的方法:
private void askSpeechInput() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.US);
try {
startActivityForResult(intent, REQ_CODE_SPEECH_INPUT);
} catch (ActivityNotFoundException a) {
}
}
他们用我在 return
中得到的文本做任何我想做的事@Override
public void onActivityResult(int requestCode, int resultCode, Intent data) {
super.onActivityResult(requestCode, resultCode, data);
switch (requestCode) {
case REQ_CODE_SPEECH_INPUT: {
if (resultCode == RESULT_OK && null != data) {
ArrayList<String> result = data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
String message = "";
message = result.get(0);
//Do whatever i want with my message
}
break;
}
}
}
所以这段代码可以获取麦克风输入,但是否可以更改它?
好吧,我不知道它是否对任何人有帮助,但我找到了解决这个问题的方法。
首先,感谢MediaRecorder.AudioSource,我使用录音机使用我想要的输入录制声音,并将其保存到文件中。
private void startRecording() {
recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
RECORDER_SAMPLERATE, RECORDER_CHANNELS,
RECORDER_AUDIO_ENCODING, BufferElements2Rec * BytesPerElement);
recorder.startRecording();
isRecording = true;
recordingThread = new Thread(new Runnable() {
public void run() {
writeAudioDataToFile();
}
}, "AudioRecorder Thread");
recordingThread.start();
}
之后,我使用找到的 flac 编码器将 .wav 编码为 .flac。
最后,我找到了一些代码,允许我将 flac 文件直接发送到 Google API,并接收我想要的文本!
public void getTranscription(int sampleRate) {
File myfil = new File(fileName);
if (!myfil.canRead()) {
Log.d("ParseStarter", "FATAL no read access");
System.out.println("FATAL CAN'T READ");
}
// first is a GET for the speech-api DOWNSTREAM
// then a future exec for the UPSTREAM / chunked encoding used so as not
// to limit
// the POST body sz
PAIR = MIN + (long) (Math.random() * ((MAX - MIN) + 1L));
// DOWN URL just like in curl full-duplex example plus the handler
downChannel(API_DOWN_URL + PAIR, messageHandler);
// UP chan, process the audio byteStream for interface to UrlConnection
// using 'chunked-encoding'
FileInputStream fis;
try {
fis = new FileInputStream(myfil);
FileChannel fc = fis.getChannel(); // Get the file's size and then
// map it into memory
int sz = (int) fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
byte[] data2 = new byte[bb.remaining()];
Log.d("ParseStarter", "mapfil " + sz + " " + bb.remaining());
bb.get(data2);
// conform to the interface from the curl examples on full-duplex
// calls
// see curl examples full-duplex for more on 'PAIR'. Just a globally
// uniq value typ=long->String.
// API KEY value is part of value in UP_URL_p2
upChannel(root + up_p1 + PAIR + up_p2 + api_key, messageHandler2,
data2);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void downChannel(String urlStr, final Handler messageHandler) {
final String url = urlStr;
new Thread() {
Bundle b;
public void run() {
String response = "NAO FOI";
Message msg = Message.obtain();
msg.what = 1;
// handler for DOWN channel http response stream - httpsUrlConn
// response handler should manage the connection.... ??
// assign a TIMEOUT Value that exceeds by a safe factor
// the amount of time that it will take to write the bytes
// to the UPChannel in a fashion that mimics a liveStream
// of the audio at the applicable Bitrate. BR=sampleRate * bits
// per sample
// Note that the TLS session uses
// "* SSLv3, TLS alert, Client hello (1): "
// to wake up the listener when there are additional bytes.
// The mechanics of the TLS session should be transparent. Just
// use
// httpsUrlConn and allow it enough time to do its work.
Scanner inStream = openHttpsConnection(url);
// process the stream and store it in StringBuilder
while (inStream.hasNextLine()) {
b = new Bundle();
b.putString("text", inStream.nextLine());
msg.setData(b);
messageHandler.dispatchMessage(msg);
}
}
}.start();
}
private void upChannel(String urlStr, final Handler messageHandler,
byte[] arg3) {
final String murl = urlStr;
final byte[] mdata = arg3;
Log.d("ParseStarter", "upChan " + mdata.length);
new Thread() {
public void run() {
String response = "NAO FOI";
Message msg = Message.obtain();
msg.what = 2;
Scanner inStream = openHttpsPostConnection(murl, mdata);
inStream.hasNext();
// process the stream and store it in StringBuilder
while (inStream.hasNextLine()) {
response += (inStream.nextLine());
Log.d("ParseStarter", "POST resp " + response.length());
}
Bundle b = new Bundle();
b.putString("post", response);
msg.setData(b);
// in.close(); // mind the resources
messageHandler.sendMessage(msg);
}
}.start();
}
// GET for DOWNSTREAM
private Scanner openHttpsConnection(String urlStr) {
InputStream in = null;
int resCode = -1;
Log.d("ParseStarter", "dwnURL " + urlStr);
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection) urlConn;
httpConn.setAllowUserInteraction(false);
// TIMEOUT is required
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("GET");
httpConn.connect();
resCode = httpConn.getResponseCode();
if (resCode == HttpsURLConnection.HTTP_OK) {
return new Scanner(httpConn.getInputStream());
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// GET for UPSTREAM
private Scanner openHttpsPostConnection(String urlStr, byte[] data) {
InputStream in = null;
byte[] mextrad = data;
int resCode = -1;
OutputStream out = null;
// int http_status;
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection) urlConn;
httpConn.setAllowUserInteraction(false);
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setChunkedStreamingMode(0);
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate="
+ rate );
httpConn.connect();
try {
// this opens a connection, then sends POST & headers.
out = httpConn.getOutputStream();
// Note : if the audio is more than 15 seconds
// dont write it to UrlConnInputStream all in one block as this
// sample does.
// Rather, segment the byteArray and on intermittently, sleeping
// thread
// supply bytes to the urlConn Stream at a rate that approaches
// the bitrate ( =30K per sec. in this instance ).
Log.d("ParseStarter", "IO beg on data");
out.write(mextrad); // one big block supplied instantly to the
// underlying chunker wont work for duration
// > 15 s.
Log.d("ParseStarter", "IO fin on data");
// do you need the trailer?
// NOW you can look at the status.
resCode = httpConn.getResponseCode();
Log.d("ParseStarter", "POST OK resp "
+ httpConn.getResponseMessage().getBytes().toString());
if (resCode / 100 != 2) {
Log.d("ParseStarter", "POST bad io ");
}
} catch (IOException e) {
Log.d("ParseStarter", "FATAL " + e);
}
if (resCode == HttpsURLConnection.HTTP_OK) {
Log.d("ParseStarter", "OK RESP to POST return scanner ");
return new Scanner(httpConn.getInputStream());
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// DOWN handler
Handler messageHandler = new Handler() {
public void handleMessage(Message msg) {
super.handleMessage(msg);
switch (msg.what) {
case 1: // GET DOWNSTREAM json id="@+id/comment"
String mtxt = msg.getData().getString("text");
if (mtxt.length() > 20) {
final String f_msg = mtxt;
handler.post(new Runnable() { // This thread runs in the UI
// TREATMENT FOR GOOGLE RESPONSE
@Override
public void run() {
System.out.println(f_msg);
String message = "";
final ChatMessage chatMessage = new ChatMessage(user1, user2,
message, "" + random.nextInt(1000), true);
message = f_msg;
chatMessage.setMsgID();
chatMessage.setMsgID();
chatMessage.body = message;
chatMessage.Date = CommonMethods.getCurrentDate();
chatMessage.Time = CommonMethods.getCurrentTime();
msg_edittext.setText("");
chatAdapter.add(chatMessage);
chatAdapter.notifyDataSetChanged();
}
});
}
break;
case 2:
break;
}
}
}; // doDOWNSTRM Handler end
// UPSTREAM channel. its servicing a thread and should have its own handler
Handler messageHandler2 = new Handler() {
public void handleMessage(Message msg) {
super.handleMessage(msg);
switch (msg.what) {
case 1: // GET DOWNSTREAM json
Log.d("ParseStarter", msg.getData().getString("post"));
break;
case 2:
Log.d("ParseStarter", msg.getData().getString("post"));
break;
}
}
}; // UPstream handler end
我从 this 项目中获得了这部分代码,其中与 google api 的连接有效,但文件编码器似乎已过时。