AVSpeechSynthesizer gets terminated immediately without speaking

107 Views Asked by At

Here is my AVSpeechSynthesizer and AVSpeechSynthesizerDelegate wrapped into an actor for better usage and testing:

import AVFAudio.AVSpeechSynthesis

actor SpeechSynthesizer {
    var delegate: SpeechSynthesisDelegate?
    var synthesizer: AVSpeechSynthesizer?

    enum DelegateAction: Equatable {
        case didCancel(AVSpeechUtterance)
        case didContinue(AVSpeechUtterance)
        case didFinish(AVSpeechUtterance)
        case didPause(AVSpeechUtterance)
        case didStart(AVSpeechUtterance)
    }

    func stop() {
        self.synthesizer?.stopSpeaking(at: .immediate)
    }

    func start(text: String) async throws -> DelegateAction {
        self.stop()

        let stream = AsyncThrowingStream<DelegateAction, Error> { continuation in
            self.delegate = SpeechSynthesisDelegate(
                didCancel: { utterance in
                    continuation.yield(.didCancel(utterance))
                }, didContinue: { utterance in
                    continuation.yield(.didContinue(utterance))
                }, didFinish: { utterance in
                    continuation.yield(.didFinish(utterance))
                    continuation.finish()
                }, didPause: { utterance in
                    continuation.yield(.didPause(utterance))
                }, didStart: { utterance in
                    continuation.yield(.didStart(utterance))
                }
            )
            let synthesizer = AVSpeechSynthesizer()
            self.synthesizer = synthesizer
            synthesizer.delegate = self.delegate

            continuation.onTermination = { [weak synthesizer] _ in
                synthesizer?.stopSpeaking(at: .immediate)
            }

            let utterance = AVSpeechUtterance(string: text)
            utterance.voice = AVSpeechSynthesisVoice(identifier: "en-US")
            utterance.rate = 0.52
            self.synthesizer?.speak(utterance)
        }

        for try await didChange in stream {
            return didChange
        }
        throw CancellationError()
    }
}

final class SpeechSynthesisDelegate: NSObject, AVSpeechSynthesizerDelegate, Sendable {
    let didCancel: @Sendable (AVSpeechUtterance) -> Void
    let didContinue: @Sendable (AVSpeechUtterance) -> Void
    let didFinish: @Sendable (AVSpeechUtterance) -> Void
    let didPause: @Sendable (AVSpeechUtterance) -> Void
    let didStart: @Sendable (AVSpeechUtterance) -> Void

    init(
        didCancel: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didContinue: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didFinish: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didPause: @escaping @Sendable (AVSpeechUtterance) -> Void,
        didStart: @escaping @Sendable (AVSpeechUtterance) -> Void
    ) {
        self.didCancel = didCancel
        self.didContinue = didContinue
        self.didFinish = didFinish
        self.didPause = didPause
        self.didStart = didStart
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
        self.didCancel(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didContinue utterance: AVSpeechUtterance) {
        self.didContinue(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
        self.didFinish(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didPause utterance: AVSpeechUtterance) {
        self.didPause(utterance)
    }

    func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
        self.didStart(utterance)
    }
}

Are is a sample App to use

import SwiftUI

@main
struct SampleApp: App {
    private let synthesizer = SpeechSynthesizer()

    var body: some Scene {
        WindowGroup {
            Button {
                Task {
                    do {
                        let result = try await synthesizer.start(text: "Hello, world!")
                        switch result {
                        case .didFinish(let utterance):
                            print("Finished speaking: \(utterance.speechString)")
                        case .didStart(let utterance):
                            print("Started speaking: \(utterance.speechString)")
                        default:
                            break
                        }
                    } catch {
                        print("Speech synthesis error: \(error)")
                    }
                }
            } label: {
                Text("Speak")
            }
        }
    }
}

On button tap, I am receiving the Started speaking: Hello, world! on the console but nothing is spoken and the Finished speaking: Hello, world! is not called either. Tested on simulator + device.

Having set a breakpoint at

continuation.onTermination = { [weak synthesizer] _ in
>>>>>    synthesizer?.stopSpeaking(at: .immediate)
}

I am guessing that the weak reference on synthesizer "deinit" the synthesizer immediately and nothing is spoken.

Any guess on how to solve this?

Edit #0

The real use case is to use the SpeechSynthesizer as a dependency in a TCA Reducer:

// Dependency
import Dependencies
import Foundation

struct SpeechSynthesizerClient {
    var startSpeaking: @Sendable (String) async throws -> SpeechSynthesizer.DelegateAction
    var stopSpeaking: @Sendable () async -> Void
}

extension DependencyValues {
    var speechSynthesizerClient: SpeechSynthesizerClient {
        get { self[SpeechSynthesizerClient.self] }
        set { self[SpeechSynthesizerClient.self] = newValue }
    }
}

extension SpeechSynthesizerClient: DependencyKey {
    static var liveValue: Self {
        let synthesizer = SpeechSynthesizer()
        return Self(
            startSpeaking: { text in try await synthesizer.start(text: text) },
            stopSpeaking: { await synthesizer.stop() }
        )
    }
}

extension SpeechSynthesizerClient: TestDependencyKey {
    static var previewValue: Self {
        return Self(
            startSpeaking: { text in
                print("Start Speaking: \(text)")
                return .didFinish(.init(string: text))
            },
            stopSpeaking: { print("Stop Speaking") }
        )
    }
}
// Reducer example
import ComposableArchitecture
import Foundation

struct MyFeature: Reducer {
    struct State: Equatable { }

    enum Action: Equatable {
        case audioRecorderAuthorizationStatusResponse(Bool, Recording.State.RecordingType)
        case speechSynthesizerDelegate(TaskResult<SpeechSynthesizer.DelegateAction>)
        case speakButtonTapped
    }

    @Dependency(\.speechSynthesizerClient) var speechSynthesizerClient

    var body: some ReducerOf<Self> {
        Reduce { state, action in
            switch action {
            case .speakButtonTapped:
                return .run { send in
                        .send(
                            .speechSynthesizerDelegate(
                                TaskResult { try await self.speechSynthesizerClient.startSpeaking("Hello, world.") }
                            )
                        )
                }

            case let .speechSynthesizerDelegate(.success(action)):
                print("Action ", action)
                switch (action) {
                case
                        .didCancel,
                        .didContinue,
                        .didFinish,
                        .didPause,
                        .didStart:
                    return .none
                }

            case let .speechSynthesizerDelegate(.failure(error)):
                print(error.localizedDescription)
                return .none
            }
        }
    }
}
1

There are 1 best solutions below

1
On

The issue you're facing might be related to the asynchronous nature of the code and the premature deallocation of the synthesizer instance before it completes speaking. The weak reference is causing the synthesizer to be deallocated even before it finishes processing.

One way to address this is to keep a strong reference to the SpeechSynthesizer instance until the entire asynchronous operation completes. You can achieve this by storing a reference to the SpeechSynthesizer instance outside the Task block.

@main
struct SampleApp: App {
    private var synthesizer = SpeechSynthesizer()

    var body: some Scene {
        WindowGroup {
            Button {
                Task {
                    do {
                        let result = try await synthesizer.start(text: "Hello, world!")
                        switch result {
                        case .didFinish(let utterance):
                            print("Finished speaking: \(utterance.speechString)")
                        case .didStart(let utterance):
                            print("Started speaking: \(utterance.speechString)")
                        default:
                            break
                        }
                    } catch {
                        print("Speech synthesis error: \(error)")
                    }
                }
            } label: {
                Text("Speak")
            }
        }
    }
}

By declaring synthesizer as a property of the struct (outside the body closure), it will be kept alive until the SampleApp itself is deallocated. This ensures that the synthesizer is not prematurely deallocated, allowing it to finish speaking.