Clear speech input buffer Swift - Speech Framework

823 Views Asked by At

I'm following this Apple Sample Code and was wondering how it would be possible to clear the input buffer (ie. result in this case), so that the dictation restarts once a word has been said.

For example:

When a users says words, they are added to result.bestTranscription.formattedString then are appended. So if I said, "White", "Purple", "Yellow", result.bestTranscription.formattedString would look like "White Purple Yellow" and words would keep being appended until the buffer stops (~1min apparently). I was making an action happen when a word was spoken, so if you say "blue" for instance, I was checking to see if the buffer contains "blue" (or "Blue") and since it does, move on to the next activity and reset the buffer.

When I do that, however, I get this error:

2020-09-09 18:25:44.247575-0400 Test App[28460:1337362] [Utility] +[AFAggregator logDictationFailedWithError:] Error Domain=kAFAssistantErrorDomain Code=209 "(null)"

It works fine to stop the audio detection when it hears "blue", but as soon as I try to re-initialize the speech recognition code, it chokes. Below is my recognitionTask:

// Create a recognition task for the speech recognition session.
// Keep a reference to the task so that it can be canceled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
    var isFinal = false
    
    if let result = result {
        // Update the text view with the results.
        self.speechInput = result.bestTranscription.formattedString
        isFinal = result.isFinal
        print("Text \(result.bestTranscription.formattedString)")
    }
    
    if (result?.bestTranscription.formattedString.range(of:"blue") != nil) || (result?.bestTranscription.formattedString.range(of:"Blue") != nil) {
        self.ColorView.backgroundColor = .random()
        isFinal = true
    }
    
    if error != nil || isFinal {
        // Stop recognizing speech if there is a problem.
        self.audioEngine.stop()
        self.recognitionRequest?.endAudio() // Necessary
        inputNode.removeTap(onBus: 0)

        self.recognitionRequest = nil
        self.recognitionTask = nil
    }
    
}

Here's the full code:

import UIKit
import Speech

private let audioEngine = AVAudioEngine()


extension CGFloat {
    static func random() -> CGFloat {
        
        return CGFloat(arc4random()) / CGFloat(UInt32.max)
    }
}

extension UIColor {
    static func random() -> UIColor {
        return UIColor(
           red:   .random(),
           green: .random(),
           blue:  .random(),
           alpha: 1.0
        )
    }
}

class ViewController: UIViewController, SFSpeechRecognizerDelegate  {
    @IBOutlet var ColorView: UIView!
    @IBOutlet weak var StartButton: UIButton!
    
    private let speechRecognizer = SFSpeechRecognizer()!
    private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
    private var recognitionTask: SFSpeechRecognitionTask?
    private let audioEngine = AVAudioEngine()
    
    private var speechInput: String = ""
    
    override func viewDidLoad() {
        super.viewDidLoad()
        
        // Create and configure the speech recognition request.
        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
        recognitionRequest.shouldReportPartialResults = true
        
        // Do any additional setup after loading the view.
        
    }
    
    func start() {
        // Cancel the previous task if it's running.
        recognitionTask?.cancel()
        self.recognitionTask = nil
        
        // Configure the audio session for the app.
        let audioSession = AVAudioSession.sharedInstance()
        do {
            try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
        } catch {}
        do {
            try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
            
        } catch {}
        let inputNode = audioEngine.inputNode

        // Create and configure the speech recognition request.
        recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
        guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create a SFSpeechAudioBufferRecognitionRequest object") }
        recognitionRequest.shouldReportPartialResults = true
        
        // Keep speech recognition data on device
        if #available(iOS 13, *) {
            recognitionRequest.requiresOnDeviceRecognition = false
        }
        
        // Create a recognition task for the speech recognition session.
        // Keep a reference to the task so that it can be canceled.
        recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
            var isFinal = false
            
            if let result = result {
                // Update the text view with the results.
                self.speechInput = result.bestTranscription.formattedString
                isFinal = result.isFinal
                print("Text \(result.bestTranscription.formattedString)")
            }
            
            if (result?.bestTranscription.formattedString.range(of:"blue") != nil) || (result?.bestTranscription.formattedString.range(of:"Blue") != nil) {
                self.ColorView.backgroundColor = .random()
                isFinal = true
            }
            
            if error != nil || isFinal {
                // Stop recognizing speech if there is a problem.
                self.audioEngine.stop()
                self.recognitionRequest?.endAudio() // Necessary
                inputNode.removeTap(onBus: 0)

                self.recognitionRequest = nil
                self.recognitionTask = nil
                
                self.start() //This chokes it
            }
            
        }

        // Configure the microphone input.
        let recordingFormat = inputNode.outputFormat(forBus: 0)
        inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
            self.recognitionRequest?.append(buffer)
        }
        
        audioEngine.prepare()
        do {
            try audioEngine.start()
        } catch {}
        
        // Let the user know to start talking.
        //textView.text = "(Go ahead, I'm listening)"
    }
    
    override func viewDidAppear(_ animated: Bool) {
        super.viewDidAppear(animated)
        // Configure the SFSpeechRecognizer object already
        // stored in a local member variable.
        speechRecognizer.delegate = self
        
        // Asynchronously make the authorization request.
        SFSpeechRecognizer.requestAuthorization { authStatus in

            // Divert to the app's main thread so that the UI
            // can be updated.
            OperationQueue.main.addOperation {
                switch authStatus {
                case .authorized:
                    self.ColorView.backgroundColor = .green
                    
                case .denied:
                    self.ColorView.backgroundColor = .red
                    
                case .restricted:
                    self.ColorView.backgroundColor = .orange
                    
                case .notDetermined:
                    self.ColorView.backgroundColor = .gray
                    
                default:
                    self.ColorView.backgroundColor = .random()
                }
            }
        }
    }
    
    @IBAction func start(_ sender: Any) {
        start()
    }
    
    public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
        if available {
            //recordButton.isEnabled = true
            //recordButton.setTitle("Start Recording", for: [])
        } else {
            //recordButton.isEnabled = false
            //recordButton.setTitle("Recognition Not Available", for: .disabled)
        }
    }
}

I'm sure there's something simple I'm missing, any advise?

0

There are 0 best solutions below