Here is my AVSpeechSynthesizer
and AVSpeechSynthesizerDelegate
wrapped into an actor
for better usage and testing:
import AVFAudio.AVSpeechSynthesis
actor SpeechSynthesizer {
var delegate: SpeechSynthesisDelegate?
var synthesizer: AVSpeechSynthesizer?
enum DelegateAction: Equatable {
case didCancel(AVSpeechUtterance)
case didContinue(AVSpeechUtterance)
case didFinish(AVSpeechUtterance)
case didPause(AVSpeechUtterance)
case didStart(AVSpeechUtterance)
}
func stop() {
self.synthesizer?.stopSpeaking(at: .immediate)
}
func start(text: String) async throws -> DelegateAction {
self.stop()
let stream = AsyncThrowingStream<DelegateAction, Error> { continuation in
self.delegate = SpeechSynthesisDelegate(
didCancel: { utterance in
continuation.yield(.didCancel(utterance))
}, didContinue: { utterance in
continuation.yield(.didContinue(utterance))
}, didFinish: { utterance in
continuation.yield(.didFinish(utterance))
continuation.finish()
}, didPause: { utterance in
continuation.yield(.didPause(utterance))
}, didStart: { utterance in
continuation.yield(.didStart(utterance))
}
)
let synthesizer = AVSpeechSynthesizer()
self.synthesizer = synthesizer
synthesizer.delegate = self.delegate
continuation.onTermination = { [weak synthesizer] _ in
synthesizer?.stopSpeaking(at: .immediate)
}
let utterance = AVSpeechUtterance(string: text)
utterance.voice = AVSpeechSynthesisVoice(identifier: "en-US")
utterance.rate = 0.52
self.synthesizer?.speak(utterance)
}
for try await didChange in stream {
return didChange
}
throw CancellationError()
}
}
final class SpeechSynthesisDelegate: NSObject, AVSpeechSynthesizerDelegate, Sendable {
let didCancel: @Sendable (AVSpeechUtterance) -> Void
let didContinue: @Sendable (AVSpeechUtterance) -> Void
let didFinish: @Sendable (AVSpeechUtterance) -> Void
let didPause: @Sendable (AVSpeechUtterance) -> Void
let didStart: @Sendable (AVSpeechUtterance) -> Void
init(
didCancel: @escaping @Sendable (AVSpeechUtterance) -> Void,
didContinue: @escaping @Sendable (AVSpeechUtterance) -> Void,
didFinish: @escaping @Sendable (AVSpeechUtterance) -> Void,
didPause: @escaping @Sendable (AVSpeechUtterance) -> Void,
didStart: @escaping @Sendable (AVSpeechUtterance) -> Void
) {
self.didCancel = didCancel
self.didContinue = didContinue
self.didFinish = didFinish
self.didPause = didPause
self.didStart = didStart
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
self.didCancel(utterance)
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didContinue utterance: AVSpeechUtterance) {
self.didContinue(utterance)
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
self.didFinish(utterance)
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didPause utterance: AVSpeechUtterance) {
self.didPause(utterance)
}
func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
self.didStart(utterance)
}
}
Are is a sample App to use
import SwiftUI
@main
struct SampleApp: App {
private let synthesizer = SpeechSynthesizer()
var body: some Scene {
WindowGroup {
Button {
Task {
do {
let result = try await synthesizer.start(text: "Hello, world!")
switch result {
case .didFinish(let utterance):
print("Finished speaking: \(utterance.speechString)")
case .didStart(let utterance):
print("Started speaking: \(utterance.speechString)")
default:
break
}
} catch {
print("Speech synthesis error: \(error)")
}
}
} label: {
Text("Speak")
}
}
}
}
On button tap, I am receiving the Started speaking: Hello, world!
on the console but nothing is spoken and the Finished speaking: Hello, world!
is not called either. Tested on simulator + device.
Having set a breakpoint at
continuation.onTermination = { [weak synthesizer] _ in
>>>>> synthesizer?.stopSpeaking(at: .immediate)
}
I am guessing that the weak reference on synthesizer "deinit" the synthesizer immediately and nothing is spoken.
Any guess on how to solve this?
Edit #0
The real use case is to use the SpeechSynthesizer
as a dependency in a TCA Reducer
:
// Dependency
import Dependencies
import Foundation
struct SpeechSynthesizerClient {
var startSpeaking: @Sendable (String) async throws -> SpeechSynthesizer.DelegateAction
var stopSpeaking: @Sendable () async -> Void
}
extension DependencyValues {
var speechSynthesizerClient: SpeechSynthesizerClient {
get { self[SpeechSynthesizerClient.self] }
set { self[SpeechSynthesizerClient.self] = newValue }
}
}
extension SpeechSynthesizerClient: DependencyKey {
static var liveValue: Self {
let synthesizer = SpeechSynthesizer()
return Self(
startSpeaking: { text in try await synthesizer.start(text: text) },
stopSpeaking: { await synthesizer.stop() }
)
}
}
extension SpeechSynthesizerClient: TestDependencyKey {
static var previewValue: Self {
return Self(
startSpeaking: { text in
print("Start Speaking: \(text)")
return .didFinish(.init(string: text))
},
stopSpeaking: { print("Stop Speaking") }
)
}
}
// Reducer example
import ComposableArchitecture
import Foundation
struct MyFeature: Reducer {
struct State: Equatable { }
enum Action: Equatable {
case audioRecorderAuthorizationStatusResponse(Bool, Recording.State.RecordingType)
case speechSynthesizerDelegate(TaskResult<SpeechSynthesizer.DelegateAction>)
case speakButtonTapped
}
@Dependency(\.speechSynthesizerClient) var speechSynthesizerClient
var body: some ReducerOf<Self> {
Reduce { state, action in
switch action {
case .speakButtonTapped:
return .run { send in
.send(
.speechSynthesizerDelegate(
TaskResult { try await self.speechSynthesizerClient.startSpeaking("Hello, world.") }
)
)
}
case let .speechSynthesizerDelegate(.success(action)):
print("Action ", action)
switch (action) {
case
.didCancel,
.didContinue,
.didFinish,
.didPause,
.didStart:
return .none
}
case let .speechSynthesizerDelegate(.failure(error)):
print(error.localizedDescription)
return .none
}
}
}
}
The issue you're facing might be related to the asynchronous nature of the code and the premature deallocation of the synthesizer instance before it completes speaking. The weak reference is causing the synthesizer to be deallocated even before it finishes processing.
One way to address this is to keep a strong reference to the SpeechSynthesizer instance until the entire asynchronous operation completes. You can achieve this by storing a reference to the SpeechSynthesizer instance outside the Task block.
By declaring synthesizer as a property of the struct (outside the body closure), it will be kept alive until the SampleApp itself is deallocated. This ensures that the synthesizer is not prematurely deallocated, allowing it to finish speaking.