iPhone 5 上的语音识别
Speech recognition on iPhone 5
我正在使用来自 Objective-C iOS 应用程序的 iOS 语音识别 API。
它适用于 iPhone 6、7,但不适用于 iPhone 5 (iOS, 10.2.1).
另请注意,它适用于 iPhone 5 秒,而不是 iPhone 5。
iOS 演讲 API 应该在 iPhone 5 上工作吗?您是否需要做任何不同的事情才能使其正常工作,或者有人知道问题出在哪里吗?
基本代码如下。没有出现错误,检测到麦克风音量,但没有检测到语音。
if (audioEngine != NULL) {
[audioEngine stop];
[speechTask cancel];
AVAudioInputNode* inputNode = [audioEngine inputNode];
[inputNode removeTapOnBus: 0];
}
recording = YES;
micButton.selected = YES;
//NSLog(@"Starting recording... SFSpeechRecognizer Available? %d", [speechRecognizer isAvailable]);
NSError * outError;
//NSLog(@"AUDIO SESSION CATEGORY0: %@", [[AVAudioSession sharedInstance] category]);
AVAudioSession* audioSession = [AVAudioSession sharedInstance];
[audioSession setCategory: AVAudioSessionCategoryPlayAndRecord withOptions:AVAudioSessionCategoryOptionDefaultToSpeaker error:&outError];
[audioSession setMode: AVAudioSessionModeMeasurement error:&outError];
[audioSession setActive: true withOptions: AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&outError];
SFSpeechAudioBufferRecognitionRequest* speechRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
//NSLog(@"AUDIO SESSION CATEGORY1: %@", [[AVAudioSession sharedInstance] category]);
if (speechRequest == nil) {
NSLog(@"Unable to create SFSpeechAudioBufferRecognitionRequest.");
return;
}
speechDetectionSamples = 0;
// This some how fixes a crash on iPhone 7
// Seems like a bug in iOS ARC/lack of gc
AVAudioEngine* temp = audioEngine;
audioEngine = [[AVAudioEngine alloc] init];
AVAudioInputNode* inputNode = [audioEngine inputNode];
speechRequest.shouldReportPartialResults = true;
// iOS speech does not detect end of speech, so must track silence.
lastSpeechDetected = -1;
speechTask = [speechRecognizer recognitionTaskWithRequest: speechRequest delegate: self];
[inputNode installTapOnBus:0 bufferSize: 4096 format: [inputNode outputFormatForBus:0] block:^(AVAudioPCMBuffer* buffer, AVAudioTime* when) {
@try {
long millis = [[NSDate date] timeIntervalSince1970] * 1000;
if (lastSpeechDetected != -1 && ((millis - lastSpeechDetected) > 1000)) {
lastSpeechDetected = -1;
[speechTask finish];
return;
}
[speechRequest appendAudioPCMBuffer: buffer];
//Calculate volume level
if ([buffer floatChannelData] != nil) {
float volume = fabsf(*buffer.floatChannelData[0]);
if (volume >= speechDetectionThreshold) {
speechDetectionSamples++;
if (speechDetectionSamples >= speechDetectionSamplesNeeded) {
//Need to change mic button image in main thread
[[NSOperationQueue mainQueue] addOperationWithBlock:^ {
[micButton setImage: [UIImage imageNamed: @"micRecording"] forState: UIControlStateSelected];
}];
}
} else {
speechDetectionSamples = 0;
}
}
}
@catch (NSException * e) {
NSLog(@"Exception: %@", e);
}
}];
[audioEngine prepare];
[audioEngine startAndReturnError: &outError];
NSLog(@"Error %@", outError);
我认为错误出在这段代码中:
long millis = [[NSDate date] timeIntervalSince1970] * 1000;
32位设备(iPhone5为32位设备),最多可保存2^32-1即2,147,483,647。
我检查了 iPhone 5 模拟器,毫秒为负值。在您发布的代码片段中,没有提到 lastSpeechDetected
在最初将其设置为 -1 后是如何设置的,但是如果 ((millis - lastSpeechDetected) > 1000)
以某种方式为真,那么它将进入 if-block 并且完成演讲任务。
我正在使用来自 Objective-C iOS 应用程序的 iOS 语音识别 API。 它适用于 iPhone 6、7,但不适用于 iPhone 5 (iOS, 10.2.1).
另请注意,它适用于 iPhone 5 秒,而不是 iPhone 5。
iOS 演讲 API 应该在 iPhone 5 上工作吗?您是否需要做任何不同的事情才能使其正常工作,或者有人知道问题出在哪里吗?
基本代码如下。没有出现错误,检测到麦克风音量,但没有检测到语音。
if (audioEngine != NULL) {
[audioEngine stop];
[speechTask cancel];
AVAudioInputNode* inputNode = [audioEngine inputNode];
[inputNode removeTapOnBus: 0];
}
recording = YES;
micButton.selected = YES;
//NSLog(@"Starting recording... SFSpeechRecognizer Available? %d", [speechRecognizer isAvailable]);
NSError * outError;
//NSLog(@"AUDIO SESSION CATEGORY0: %@", [[AVAudioSession sharedInstance] category]);
AVAudioSession* audioSession = [AVAudioSession sharedInstance];
[audioSession setCategory: AVAudioSessionCategoryPlayAndRecord withOptions:AVAudioSessionCategoryOptionDefaultToSpeaker error:&outError];
[audioSession setMode: AVAudioSessionModeMeasurement error:&outError];
[audioSession setActive: true withOptions: AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation error:&outError];
SFSpeechAudioBufferRecognitionRequest* speechRequest = [[SFSpeechAudioBufferRecognitionRequest alloc] init];
//NSLog(@"AUDIO SESSION CATEGORY1: %@", [[AVAudioSession sharedInstance] category]);
if (speechRequest == nil) {
NSLog(@"Unable to create SFSpeechAudioBufferRecognitionRequest.");
return;
}
speechDetectionSamples = 0;
// This some how fixes a crash on iPhone 7
// Seems like a bug in iOS ARC/lack of gc
AVAudioEngine* temp = audioEngine;
audioEngine = [[AVAudioEngine alloc] init];
AVAudioInputNode* inputNode = [audioEngine inputNode];
speechRequest.shouldReportPartialResults = true;
// iOS speech does not detect end of speech, so must track silence.
lastSpeechDetected = -1;
speechTask = [speechRecognizer recognitionTaskWithRequest: speechRequest delegate: self];
[inputNode installTapOnBus:0 bufferSize: 4096 format: [inputNode outputFormatForBus:0] block:^(AVAudioPCMBuffer* buffer, AVAudioTime* when) {
@try {
long millis = [[NSDate date] timeIntervalSince1970] * 1000;
if (lastSpeechDetected != -1 && ((millis - lastSpeechDetected) > 1000)) {
lastSpeechDetected = -1;
[speechTask finish];
return;
}
[speechRequest appendAudioPCMBuffer: buffer];
//Calculate volume level
if ([buffer floatChannelData] != nil) {
float volume = fabsf(*buffer.floatChannelData[0]);
if (volume >= speechDetectionThreshold) {
speechDetectionSamples++;
if (speechDetectionSamples >= speechDetectionSamplesNeeded) {
//Need to change mic button image in main thread
[[NSOperationQueue mainQueue] addOperationWithBlock:^ {
[micButton setImage: [UIImage imageNamed: @"micRecording"] forState: UIControlStateSelected];
}];
}
} else {
speechDetectionSamples = 0;
}
}
}
@catch (NSException * e) {
NSLog(@"Exception: %@", e);
}
}];
[audioEngine prepare];
[audioEngine startAndReturnError: &outError];
NSLog(@"Error %@", outError);
我认为错误出在这段代码中:
long millis = [[NSDate date] timeIntervalSince1970] * 1000;
32位设备(iPhone5为32位设备),最多可保存2^32-1即2,147,483,647。
我检查了 iPhone 5 模拟器,毫秒为负值。在您发布的代码片段中,没有提到 lastSpeechDetected
在最初将其设置为 -1 后是如何设置的,但是如果 ((millis - lastSpeechDetected) > 1000)
以某种方式为真,那么它将进入 if-block 并且完成演讲任务。