文本转语音和语音转文本识别 --> 正在发生自我识别

Question

我想开发一个支持语音转文本和文本转语音的应用程序，

i)Speech to Text- Procedure-我已经使用 Speech framework 进行语音到文本，每当我打开一个应用程序并且如果我开始说话，该应用程序应该识别语音并将语音转换为文本。这是工作

ii)Text to Speech - Procedure -i have used AVFoundation and MediaPlayer library 如果用户按下播放按钮，它应该将文本即现在屏幕上出现的任何内容转换为 speech.Working。

这是我面临的问题在将文本处理为语音时，语音识别器 会识别播放的语音并在文本框中再次打印单词。

示例-如果我说“你好早安”，它会打印在文本框中，然后如果我按下播放按钮，它会播放语音“你好早安”，但此时time speech to text recognition 识别这个声音我的意思是自我识别并且它正在打印“你好早安你好早安”

我想在处理 Text To Speech

时停止 Speech To Text 进程

为此，我在播放语音时停止了语音识别请求这是代码，

@implementation ViewController
{
SFSpeechAudioBufferRecognitionRequest *recognitionRequest;
SFSpeechRecognitionTask *recognitionTask;
AVAudioEngine *audioEngine;
NSMutableArray *speechStringsArray;
BOOL SpeechToText;
NSString* resultString;
NSString *str ;
NSString *searchString;
NSString *textToSpeak;
}

- (void)viewDidLoad {
[super viewDidLoad];

//Speech To Text ****

speechStringsArray = [[NSMutableArray alloc]init];

// Initialize background audio session
NSError *error = NULL;
AVAudioSession *session = [AVAudioSession sharedInstance];
[session setCategory:AVAudioSessionCategoryPlayback error:&error];
if(error) {
    NSLog(@"@error: %@", error);
}
[session setActive:YES error:&error];
if (error) {
    NSLog(@"@error: %@", error);
}

// Enabled remote controls
[[UIApplication sharedApplication] beginReceivingRemoteControlEvents];

// Voice setup
self.voicePicker.delegate = self;
self.voice = [AVSpeechSynthesisVoice voiceWithLanguage:@"en-us"];
self.voices = [NSMutableArray arrayWithObjects:
               @{@"voice" : @"en-us", @"label" : @"American English (Female)"},
               @{@"voice" : @"en-au", @"label" : @"Australian English (Female)"},
               @{@"voice" : @"en-gb", @"label" : @"British English (Male)"},
               @{@"voice" : @"en-ie", @"label" : @"Irish English (Female)"},
               @{@"voice" : @"en-za", @"label" : @"South African English (Female)"},
               nil];

// Synthesizer setup
self.synthesizer = [[AVSpeechSynthesizer alloc] init];
self.synthesizer.delegate = self;

// UITextView delegate
self.textView.delegate = self;

// This notifcation is generated from the AppDelegate applicationDidBecomeActive method to make sure that if the play or pause button is updated in the background then the button will be updated in the toolbar
[[NSNotificationCenter defaultCenter] addObserver:self selector:@selector(updateToolbar) name:@"updateToolbar" object:nil];
  }


-(void)viewDidAppear:(BOOL)animated
{

self.speechRecognizer = [[SFSpeechRecognizer alloc]initWithLocale:[NSLocale localeWithLocaleIdentifier:@"en-US en-UK"]];

self.speechRecognizer.delegate = self;
audioEngine = [[AVAudioEngine alloc]init];
[SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus authStatus) {
    switch (authStatus) {
        case SFSpeechRecognizerAuthorizationStatusAuthorized:
            //User gave access to speech recognition
            NSLog(@"Authorized");

            [self start_record];

            break;

        case SFSpeechRecognizerAuthorizationStatusDenied:
            //User denied access to speech recognition
            NSLog(@"AuthorizationStatusDenied");

            break;

        case SFSpeechRecognizerAuthorizationStatusRestricted:
            //Speech recognition restricted on this device
            NSLog(@"AuthorizationStatusRestricted");

            break;

        case SFSpeechRecognizerAuthorizationStatusNotDetermined:
            //Speech recognition not yet authorized

            break;

        default:
            NSLog(@"Default");
            break;
    }
}];

//MARK : Interface Builder Actions

  }

**** 增加速度和音高的代码****

  - (IBAction)handleSpeedStepper:(UIStepper *)sender
  {
double speedValue = self.speedStepper.value;
[self.speedValueLabel setText:[NSString stringWithFormat:@"%.1f", speedValue]];
   }

  - (IBAction)handlePitchStepper:(UIStepper *)sender
  {
double pitchValue = self.pitchStepper.value;
[self.pitchValueLabel setText:[NSString stringWithFormat:@"%.1f", pitchValue]];
   }

//文字转语音播放按钮 - (IBAction)handlePlayPauseButton:(UIBarButtonItem *)发件人 {

 if (self.synthesizer.speaking && !self.synthesizer.paused) {
    if (self.pauseSettingSegmentedControl.selectedSegmentIndex == 0) {
        // Stop immediately
        [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
    }
    else {
        // Stop at end of current word
        [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryWord];

    }
    [self updateToolbarWithButton:@"play"];
    }
    else if (self.synthesizer.paused) {
    [self.synthesizer continueSpeaking];
    [self updateToolbarWithButton:@"pause"];
     }
    else {
    [self speakUtterance];
    [self updateToolbarWithButton:@"pause"];
    }
    }

//语音转文字方法 -(无效)start_record{

    NSError * outError;
    AVAudioSession *audioSession = [AVAudioSession sharedInstance];
    [audioSession setCategory:AVAudioSessionCategoryPlayAndRecord error:&outError];

     [audioSession setMode:AVAudioSessionModeMeasurement error:&outError];
     [audioSession setActive:YES withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation  error:&outError];
 recognitionRequest = [[SFSpeechAudioBufferRecognitionRequest alloc]init];
 AVAudioInputNode *inputNode = audioEngine.inputNode;

     if (recognitionRequest  == nil) {
     NSLog(@"Unable to created a SFSpeechAudioBufferRecognitionRequest object");
     }
     if (inputNode == nil) {
     NSLog(@"Audio engine has no input node ");}

     //configure request so that results are returned before audio recording is finished
    [recognitionRequest setShouldReportPartialResults:YES];
    // A recognition task represents a speech recognition session.
    //We keep a reference to the task so that it can be cancelled .


    recognitionTask = [self.speechRecognizer recognitionTaskWithRequest:recognitionRequest resultHandler:^(SFSpeechRecognitionResult * result, NSError *  error1) {

    BOOL isFinal = false;

    if ((result = result)) {


        NSString *speech = result.bestTranscription.formattedString;
        NSLog(@"the speech:%@",speech);

        // coding for fixing append string issue
        for (int i = 0 ;i <speechStringsArray.count;i++)
        {

            str = [speechStringsArray objectAtIndex:i];

            NSRange range = [speech rangeOfString:str options:NSCaseInsensitiveSearch];
            NSLog(@"found: %@", (range.location != NSNotFound) ? @"Yes" : @"No");

            if (range.location != NSNotFound) {

                resultString = [speech stringByReplacingCharactersInRange:range withString:@""];

                speech = resultString;

                NSLog(@" the result is : %@",resultString);

            }

        }

        //specific functions - space for second word
        if (resultString.length>0) {

             self.textView.text = [NSString stringWithFormat:@"%@%@",self.textView.text,resultString];


                [speechStringsArray addObject:resultString];        }

        //specific function space for first word -Working fine
        else
        {
            [speechStringsArray addObject:speech];
                self.textView.text = speech;

            }


        }

        NSLog(@" array %@",speechStringsArray);


        isFinal = result.isFinal;

    }


    if (error1 != nil || isFinal) {

        [audioEngine stop];
        [inputNode removeTapOnBus:0];
        recognitionRequest = nil;
        recognitionTask = nil;

        [self start_record];

        }}];

        AVAudioFormat *recordingFormat =  [inputNode outputFormatForBus:0];
        [inputNode installTapOnBus:0 bufferSize:1024 format:recordingFormat block:^(AVAudioPCMBuffer * _Nonnull buffer, AVAudioTime * _Nonnull when){
        [recognitionRequest appendAudioPCMBuffer:buffer];}
];
        NSError *error1;
        [audioEngine prepare];
        [audioEngine startAndReturnError:&error1];}

        - (void)speakUtterance
        {
        NSLog(@"speakUtterance");
        didStartSpeaking = NO;
        textToSpeak = [NSString stringWithFormat:@"%@", self.textView.text];
        AVSpeechUtterance *utterance = [[AVSpeechUtterance alloc] initWithString:textToSpeak];
        utterance.rate = self.speedStepper.value;
        utterance.pitchMultiplier = self.pitchStepper.value;
        utterance.voice = self.voice;
        [self.synthesizer speakUtterance:utterance];
        [self displayBackgroundMediaFields];
        }

        - (void)displayBackgroundMediaFields
        {
        MPMediaItemArtwork *artwork = [[MPMediaItemArtwork alloc] initWithImage:[UIImage imageNamed:@"Play"]];

         NSDictionary *info = @{ MPMediaItemPropertyTitle: self.textView.text,
                        MPMediaItemPropertyAlbumTitle: @"TextToSpeech App",
                        MPMediaItemPropertyArtwork: artwork};

         [MPNowPlayingInfoCenter defaultCenter].nowPlayingInfo = info;
         }

        - (void)updateToolbar
       {
      if (self.synthesizer.speaking && !self.synthesizer.paused) {
      [self updateToolbarWithButton:@"pause"];
      }
      else {
    [self updateToolbarWithButton:@"play"];
     }}

    - (void)updateToolbarWithButton:(NSString *)buttonType
    {

     //stopping the speech to text process 
     if (audioEngine.isRunning) {

    [audioEngine stop];

    [recognitionRequest endAudio];

}



     NSLog(@"updateToolbarWithButton: %@", buttonType);
     UIBarButtonItem *audioControl;
     if ([buttonType isEqualToString:@"play"]) {
     // Play
     audioControl = [[UIBarButtonItem alloc]initWithBarButtonSystemItem:UIBarButtonSystemItemPlay target:self action:@selector(handlePlayPauseButton:)];
     }
     else {
    // Pause
    audioControl = [[UIBarButtonItem alloc]initWithBarButtonSystemItem:UIBarButtonSystemItemPause target:self action:@selector(handlePlayPauseButton:)];
     }
     UIBarButtonItem *flexibleItem = [[UIBarButtonItem alloc] initWithBarButtonSystemItem:UIBarButtonSystemItemFlexibleSpace target:nil action:nil];

     [self.toolbar setItems:@[flexibleItem, audioControl, flexibleItem]];
     }

     - (void)remoteControlReceivedWithEvent:(UIEvent *)receivedEvent
     {
     NSLog(@"receivedEvent: %@", receivedEvent);
     if (receivedEvent.type == UIEventTypeRemoteControl) {
     switch (receivedEvent.subtype) {
     case UIEventSubtypeRemoteControlPlay:
            NSLog(@"UIEventSubtypeRemoteControlPlay");
            if (self.synthesizer.speaking) {
                [self.synthesizer continueSpeaking];
            }
            else {
                [self speakUtterance];
            }
            break;

            case UIEventSubtypeRemoteControlPause:
            NSLog(@"pause - UIEventSubtypeRemoteControlPause");

            if (self.pauseSettingSegmentedControl.selectedSegmentIndex == 0) {
                // Pause immediately
                [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
            }
            else {
                // Pause at end of current word
                [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryWord];
            }
            break;

        case UIEventSubtypeRemoteControlTogglePlayPause:
            if (self.synthesizer.paused) {
                NSLog(@"UIEventSubtypeRemoteControlTogglePlayPause");
                [self.synthesizer continueSpeaking];
            }
            else {
                NSLog(@"UIEventSubtypeRemoteControlTogglePlayPause");
                if (self.pauseSettingSegmentedControl.selectedSegmentIndex == 0)                   {
                    // Pause immediately
                    [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
                }
                else {
                    // Pause at end of current word
                    [self.synthesizer pauseSpeakingAtBoundary:AVSpeechBoundaryWord];
                }
                }
                break;

        case UIEventSubtypeRemoteControlNextTrack:
            NSLog(@"UIEventSubtypeRemoteControlNextTrack - appropriate for playlists");
            break;

        case UIEventSubtypeRemoteControlPreviousTrack:
            NSLog(@"UIEventSubtypeRemoteControlPreviousTrack - appropriatefor playlists");
            break;

        default:
            break;
        }     
        }
         }

pragma 标记 UIPickerViewDelegate 方法

         - (NSInteger)numberOfComponentsInPickerView:(UIPickerView *)pickerView
          {
           return 1;
           }

           - (NSInteger)pickerView:(UIPickerView *)pickerView numberOfRowsInComponent:(NSInteger)component
           {
           return self.voices.count;
            }

           - (UIView *)pickerView:(UIPickerView *)pickerView viewForRow:(NSInteger)row forComponent:(NSInteger)component reusingView:(UIView *)view
           {
            UILabel *rowLabel = [[UILabel alloc] init];
            NSDictionary *voice = [self.voices objectAtIndex:row];
            rowLabel.text = [voice objectForKey:@"label"];
            return rowLabel;
            }

            - (void)pickerView:(UIPickerView *)pickerView didSelectRow: (NSInteger)row inComponent:(NSInteger)component
            {
            NSDictionary *voice = [self.voices objectAtIndex:row];
            NSLog(@"new picker voice selected with label: %@", [voice objectForKey:@"label"]);
            self.voice = [AVSpeechSynthesisVoice voiceWithLanguage:[voice objectForKey:@"voice"]];
             }

pragma 标记 SpeechSynthesizerDelegate 方法

        - (void)speechSynthesizer:(AVSpeechSynthesizer *)synthesizer didFinishSpeechUtterance:(AVSpeechUtterance *)utterance
        {
        // This is a workaround of a bug. When we change the voice the first time the speech utterence is set fails silently. We check that the method willSpeakRangeOfSpeechString is called and set didStartSpeaking to YES there. If this method is not called (silent fail) then we simply request to speak again.
         if (!didStartSpeaking) {
          [self speakUtterance];

         }
         else {
         [self updateToolbarWithButton:@"play"];

        NSLog(@"the text are:%@",self.textView.text);

        }}

        - (void)speechSynthesizer:(AVSpeechSynthesizer *)synthesizer willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance
        {
didStartSpeaking = YES;
//[self setTextViewTextWithColoredCharacterRange:characterRange];

        }

        #pragma mark UITextViewDelegate Methods

        - (BOOL)textView:(UITextView *)textView shouldChangeTextInRange:(NSRange)range replacementText:(NSString *)text {

       if([text isEqualToString:@"\n"]) {
       [textView resignFirstResponder];
       return NO;
       }

       return YES;
       }

Answer 1

不要初始化 ViewDidLoad 中的所有东西。当您点击按钮将文本转换为语音时，那时将语音到文本转换对象设置为 nil 并将委托设置为 nil。反之亦然。

文本转语音和语音转文本识别 --> 正在发生自我识别

Text To Speech And Speech To Text Recognition -->self - Recognition is occurring

objective-c

speech-recognition

text-to-speech

speech-to-text

pragma 标记 UIPickerViewDelegate 方法

pragma 标记 SpeechSynthesizerDelegate 方法