SpeechAnalyzerEngine.swift

// This Source Code Form is subject to the terms of the Mozilla Public

// License, v. 2.0. If a copy of the MPL was not distributed with this

// file, You can obtain one at http://mozilla.org/MPL/2.0/

// TODO: FXIOS-14934 - remove preconcurrency

import AVFoundation

import Speech

import Common

import CoreMedia

/// A transcription engine built on iOS 26's `SpeechAnalyzer` + `SpeechTranscriber`.

///

/// Responsibilities:

/// - Request/check microphone + speech permissions

/// - Stream transcription results through an `AsyncThrowingStream` continuation

///

/// This type is an `@MainActor` class to keep audio/transcription state safe across concurrent calls.

@available(iOS 26.0, *)

@MainActor

final class SpeechAnalyzerEngine: TranscriptionEngine {

    private let audioManager: AudioManagerProtocol

    private let authorizer: AuthorizeProvider

    private let locale: Locale

    private var analyzer: SpeechAnalyzer?

    private var transcriber: SpeechTranscriber?

    private var inputContinuation: AsyncStream<AnalyzerInput>.Continuation?

    private var resultsTask: Task<Void, Error>?

    init(

        locale: Locale = Locale.current,

        audioManager: AudioManagerProtocol,

        authorizer: AuthorizeProvider

) {

        self.audioManager = audioManager

        self.authorizer = authorizer

        self.locale = locale

    func prepare() async throws {

        guard await isPermissionGranted() else {

            throw SpeechError.permissionDenied

        try audioManager.configureAudioSession()

    /// Starts transcription and streams results through `continuation`.

///

    /// This method:

    /// 1) resolves a supported locale

    /// 2) creates a transcriber + analyzer

    /// 3) ensures the speech model is installed (downloads if needed)

    /// 4) prepares the analyzer with a compatible audio format

    /// 5) starts analyzer + results tasks

    /// 6) starts microphone capture and feeds audio buffers into the analyzer input stream

///

    /// - Parameter continuation: Receives incremental and final `SpeechResult` values.

    func start(continuation: AsyncThrowingStream<SpeechResult, any Error>.Continuation) async throws {

        // TODO: Use LocaleProvider instead

        let resolvedLocale = try await resolveLocale(with: locale)

        let transcriber = SpeechTranscriber(

            locale: resolvedLocale,

            transcriptionOptions: [],

            reportingOptions: [.volatileResults],

            attributeOptions: [.transcriptionConfidence]

        self.transcriber = transcriber

        try await ensureModelAvailable(transcriber: transcriber, locale: resolvedLocale)

        let analyzer = SpeechAnalyzer(modules: [transcriber])

        self.analyzer = analyzer

        let targetFormat = await SpeechAnalyzer.bestAvailableAudioFormat(compatibleWith: [transcriber])

        guard let targetFormat else {

            throw SpeechError.noAudioFormat

        try await analyzer.prepareToAnalyze(in: targetFormat)

        // Build analyzer input stream; mic capture yields audio buffers into `inputContinuation`.

        let stream = AsyncStream<AnalyzerInput> { continuation in

            self.inputContinuation = continuation

        try await analyzer.start(inputSequence: stream)

        resultsTask = Task { [weak self] in

            guard let self, let transcriber = self.transcriber else { return }

            do {

                for try await result in transcriber.results {

                    let chunk = String(result.text.characters)

                    let speechResult = SpeechResult(

                        text: chunk,

                        isFinal: result.isFinal

                    continuation.yield(speechResult)

                    if result.isFinal {

                        continuation.finish()

            } catch {

                continuation.finish(throwing: error)

        // Start microphone capture and feed `AnalyzerInput(buffer:)` into the stream.

        guard let continuation = inputContinuation else {

            throw SpeechError.noInputContinuation

        try audioManager.startCapture(targetFormat: targetFormat, bufferSize: 4096) { buffer in

            continuation.yield(AnalyzerInput(buffer: buffer))

        try audioManager.prepareAndStartEngine()

    func stop() async throws {

        audioManager.stopEngine()

        inputContinuation?.finish()

        inputContinuation = nil

        try await analyzer?.finalizeAndFinishThroughEndOfInput()

        resultsTask = nil

        transcriber = nil

        analyzer = nil

    private func isPermissionGranted() async -> Bool {

        let isMicAuthorized = await authorizer.isMicrophonePermissionAuthorized()

        let isSpeechAuthorized = await authorizer.isSpeechPermissionAuthorized()

        return isMicAuthorized && isSpeechAuthorized

    private func resolveLocale(with currentLocale: Locale) async throws -> Locale {

        if let supported = await SpeechTranscriber.supportedLocale(equivalentTo: currentLocale) {

            return supported

        } else {

            throw SpeechError.unableToSupportLocale

    /// Ensures a speech model is available for `locale`.

///

    /// If the locale is supported but not installed, this will download and install the model.

    private func ensureModelAvailable(transcriber: SpeechTranscriber, locale: Locale) async throws {

        guard await supported(locale: locale) else {

            throw SpeechError.unableToSupportLocale

        if await installed(locale: locale) {

            return

        } else {

            try await downloadIfNeeded(for: transcriber)

    private func supported(locale: Locale) async -> Bool {

        let supported = await SpeechTranscriber.supportedLocales

        return supported.map { $0.identifier(.bcp47) }.contains(locale.identifier(.bcp47))

    private func installed(locale: Locale) async -> Bool {

        let installed = await Set(SpeechTranscriber.installedLocales)

        return installed.map { $0.identifier(.bcp47) }.contains(locale.identifier(.bcp47))

    private func downloadIfNeeded(for module: SpeechTranscriber) async throws {

        if let downloader = try await AssetInventory.assetInstallationRequest(supporting: [module]) {

            try await downloader.downloadAndInstall()