一个使用语音识别计算口语数量的应用程序
An App that counts the number of spoken words by using Speech Recognition
有没有什么方法可以使用某种(离线)语音识别来计算音频文件中的字数?实现这样的东西的最佳方法是什么?
还有什么方法可以让它连续工作,而不必每次有人在说话时停顿时重新启动它?
Android comes with an inbuilt feature speech to text through which you
can provide speech input to your app.
这是一个小示例代码,介绍如何使用内置 API 识别语音并将其轻松转换为文本。之后,您可以将文本提取为 String
并计算代码中的单词,就像您通常计算常规 String
中的单词一样,例如使用 .split(" ")
或其他一些已知的方法。
/**
* Showing google speech input dialog
* */
private void promptSpeechInput() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault());
intent.putExtra(RecognizerIntent.EXTRA_PROMPT,
getString(R.string.speech_prompt));
try {
startActivityForResult(intent, REQ_CODE_SPEECH_INPUT);
} catch (ActivityNotFoundException a) {
Toast.makeText(getApplicationContext(),
getString(R.string.speech_not_supported),
Toast.LENGTH_SHORT).show();
}
}
/**
* Receiving speech input
* */
@Override
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
super.onActivityResult(requestCode, resultCode, data);
switch (requestCode) {
case REQ_CODE_SPEECH_INPUT: {
if (resultCode == RESULT_OK && null != data) {
ArrayList<String> result = data
.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
txtSpeechInput.setText(result.get(0));
}
break;
}
}
}
(完整的原始教程在这里:https://www.androidhive.info/2014/07/android-speech-to-text-tutorial/)
这里是我的问题的答案,供可能需要的人使用:
public class MainActivity extends AppCompatActivity implements
RecognitionListener {
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private TextView returnedText;
private TextView returnedError;
private ProgressBar progressBar;
private TextView brojtextview;
private SpeechRecognizer speech = null;
private Intent recognizerIntent;
private String LOG_TAG = "VoiceRecognitionActivity";
private int ukupanbroj;
private void resetSpeechRecognizer() {
if(speech != null)
speech.destroy();
speech = SpeechRecognizer.createSpeechRecognizer(this);
Log.i(LOG_TAG, "isRecognitionAvailable: " + SpeechRecognizer.isRecognitionAvailable(this));
if(SpeechRecognizer.isRecognitionAvailable(this))
speech.setRecognitionListener(this);
else
finish();
}
private void setRecogniserIntent() {
recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
// recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE,true);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,
"sr-RS");
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// UI initialisation
returnedText = findViewById(R.id.textView1);
returnedError = findViewById(R.id.errorView1);
progressBar = findViewById(R.id.progressBar1);
brojtextview=findViewById(R.id.brojtextview);
progressBar.setVisibility(View.INVISIBLE);
// start speech recogniser
resetSpeechRecognizer();
// start progress bar
progressBar.setVisibility(View.VISIBLE);
progressBar.setIndeterminate(true);
// check for permission
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
return;
}
setRecogniserIntent();
speech.startListening(recognizerIntent);
}
@Override
public void onRequestPermissionsResult(int requestCode,
@NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
speech.startListening(recognizerIntent);
} else {
Toast.makeText(MainActivity.this, "Permission Denied!", Toast
.LENGTH_SHORT).show();
finish();
}
}
}
@Override
public void onResume() {
Log.i(LOG_TAG, "resume");
super.onResume();
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
protected void onPause() {
Log.i(LOG_TAG, "pause");
super.onPause();
speech.stopListening();
}
@Override
protected void onStop() {
Log.i(LOG_TAG, "stop");
super.onStop();
if (speech != null) {
speech.destroy();
}
}
@Override
public void onBeginningOfSpeech() {
Log.i(LOG_TAG, "onBeginningOfSpeech");
progressBar.setIndeterminate(false);
progressBar.setMax(10);
}
@Override
public void onBufferReceived(byte[] buffer) {
Log.i(LOG_TAG, "onBufferReceived: " + buffer);
}
@Override
public void onEndOfSpeech() {
Log.i(LOG_TAG, "onEndOfSpeech");
progressBar.setIndeterminate(true);
speech.stopListening();
}
@Override
public void onResults(Bundle results) {
Log.i(LOG_TAG, "onResults");
ArrayList<String> matches = results
.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String text = matches.get(0);
returnedText.setText(text);
String trimmed = text.trim();
int words = trimmed.isEmpty() ? 0 : trimmed.split("\s+").length;
ukupanbroj+=words;
brojtextview.setText( "" + ukupanbroj);
speech.startListening(recognizerIntent);
}
@Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
Log.i(LOG_TAG, "FAILED " + errorMessage);
returnedError.setText(errorMessage);
// rest voice recogniser
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
public void onEvent(int arg0, Bundle arg1) {
Log.i(LOG_TAG, "onEvent");
}
@Override
public void onPartialResults(Bundle arg0) {
Log.i(LOG_TAG, "onPartialResults");
}
@Override
public void onReadyForSpeech(Bundle arg0) {
Log.i(LOG_TAG, "onReadyForSpeech");
}
@Override
public void onRmsChanged(float rmsdB) {
//Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
progressBar.setProgress((int) rmsdB);
}
public String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio recording error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client side error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network timeout";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "error from server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No speech input";
break;
default:
message = "Didn't understand, please try again.";
break;
}
return message;
}
}
有没有什么方法可以使用某种(离线)语音识别来计算音频文件中的字数?实现这样的东西的最佳方法是什么? 还有什么方法可以让它连续工作,而不必每次有人在说话时停顿时重新启动它?
Android comes with an inbuilt feature speech to text through which you can provide speech input to your app.
这是一个小示例代码,介绍如何使用内置 API 识别语音并将其轻松转换为文本。之后,您可以将文本提取为 String
并计算代码中的单词,就像您通常计算常规 String
中的单词一样,例如使用 .split(" ")
或其他一些已知的方法。
/**
* Showing google speech input dialog
* */
private void promptSpeechInput() {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, Locale.getDefault());
intent.putExtra(RecognizerIntent.EXTRA_PROMPT,
getString(R.string.speech_prompt));
try {
startActivityForResult(intent, REQ_CODE_SPEECH_INPUT);
} catch (ActivityNotFoundException a) {
Toast.makeText(getApplicationContext(),
getString(R.string.speech_not_supported),
Toast.LENGTH_SHORT).show();
}
}
/**
* Receiving speech input
* */
@Override
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
super.onActivityResult(requestCode, resultCode, data);
switch (requestCode) {
case REQ_CODE_SPEECH_INPUT: {
if (resultCode == RESULT_OK && null != data) {
ArrayList<String> result = data
.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
txtSpeechInput.setText(result.get(0));
}
break;
}
}
}
(完整的原始教程在这里:https://www.androidhive.info/2014/07/android-speech-to-text-tutorial/)
这里是我的问题的答案,供可能需要的人使用:
public class MainActivity extends AppCompatActivity implements
RecognitionListener {
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;
private TextView returnedText;
private TextView returnedError;
private ProgressBar progressBar;
private TextView brojtextview;
private SpeechRecognizer speech = null;
private Intent recognizerIntent;
private String LOG_TAG = "VoiceRecognitionActivity";
private int ukupanbroj;
private void resetSpeechRecognizer() {
if(speech != null)
speech.destroy();
speech = SpeechRecognizer.createSpeechRecognizer(this);
Log.i(LOG_TAG, "isRecognitionAvailable: " + SpeechRecognizer.isRecognitionAvailable(this));
if(SpeechRecognizer.isRecognitionAvailable(this))
speech.setRecognitionListener(this);
else
finish();
}
private void setRecogniserIntent() {
recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
// recognizerIntent.putExtra(RecognizerIntent.EXTRA_PREFER_OFFLINE,true);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE,
"sr-RS");
recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
}
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// UI initialisation
returnedText = findViewById(R.id.textView1);
returnedError = findViewById(R.id.errorView1);
progressBar = findViewById(R.id.progressBar1);
brojtextview=findViewById(R.id.brojtextview);
progressBar.setVisibility(View.INVISIBLE);
// start speech recogniser
resetSpeechRecognizer();
// start progress bar
progressBar.setVisibility(View.VISIBLE);
progressBar.setIndeterminate(true);
// check for permission
int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
return;
}
setRecogniserIntent();
speech.startListening(recognizerIntent);
}
@Override
public void onRequestPermissionsResult(int requestCode,
@NonNull String[] permissions, @NonNull int[] grantResults) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults);
if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
speech.startListening(recognizerIntent);
} else {
Toast.makeText(MainActivity.this, "Permission Denied!", Toast
.LENGTH_SHORT).show();
finish();
}
}
}
@Override
public void onResume() {
Log.i(LOG_TAG, "resume");
super.onResume();
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
protected void onPause() {
Log.i(LOG_TAG, "pause");
super.onPause();
speech.stopListening();
}
@Override
protected void onStop() {
Log.i(LOG_TAG, "stop");
super.onStop();
if (speech != null) {
speech.destroy();
}
}
@Override
public void onBeginningOfSpeech() {
Log.i(LOG_TAG, "onBeginningOfSpeech");
progressBar.setIndeterminate(false);
progressBar.setMax(10);
}
@Override
public void onBufferReceived(byte[] buffer) {
Log.i(LOG_TAG, "onBufferReceived: " + buffer);
}
@Override
public void onEndOfSpeech() {
Log.i(LOG_TAG, "onEndOfSpeech");
progressBar.setIndeterminate(true);
speech.stopListening();
}
@Override
public void onResults(Bundle results) {
Log.i(LOG_TAG, "onResults");
ArrayList<String> matches = results
.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
String text = matches.get(0);
returnedText.setText(text);
String trimmed = text.trim();
int words = trimmed.isEmpty() ? 0 : trimmed.split("\s+").length;
ukupanbroj+=words;
brojtextview.setText( "" + ukupanbroj);
speech.startListening(recognizerIntent);
}
@Override
public void onError(int errorCode) {
String errorMessage = getErrorText(errorCode);
Log.i(LOG_TAG, "FAILED " + errorMessage);
returnedError.setText(errorMessage);
// rest voice recogniser
resetSpeechRecognizer();
speech.startListening(recognizerIntent);
}
@Override
public void onEvent(int arg0, Bundle arg1) {
Log.i(LOG_TAG, "onEvent");
}
@Override
public void onPartialResults(Bundle arg0) {
Log.i(LOG_TAG, "onPartialResults");
}
@Override
public void onReadyForSpeech(Bundle arg0) {
Log.i(LOG_TAG, "onReadyForSpeech");
}
@Override
public void onRmsChanged(float rmsdB) {
//Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
progressBar.setProgress((int) rmsdB);
}
public String getErrorText(int errorCode) {
String message;
switch (errorCode) {
case SpeechRecognizer.ERROR_AUDIO:
message = "Audio recording error";
break;
case SpeechRecognizer.ERROR_CLIENT:
message = "Client side error";
break;
case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
message = "Insufficient permissions";
break;
case SpeechRecognizer.ERROR_NETWORK:
message = "Network error";
break;
case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
message = "Network timeout";
break;
case SpeechRecognizer.ERROR_NO_MATCH:
message = "No match";
break;
case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
message = "RecognitionService busy";
break;
case SpeechRecognizer.ERROR_SERVER:
message = "error from server";
break;
case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
message = "No speech input";
break;
default:
message = "Didn't understand, please try again.";
break;
}
return message;
}
}