Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ios): nomatch event firing on iOS 18+ #43

Merged
merged 2 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ ExpoSpeechRecognitionModule.start({
// The maximum number of alternative transcriptions to return.
maxAlternatives: 1,
// [Default: false] Continuous recognition.
// If false on iOS, recognition will run until no speech is detected for 3 seconds.
// If false:
// - on iOS 17-, recognition will run until no speech is detected for 3 seconds.
// - on iOS 18+ and Android, recognition will run until a final result is received.
// Not supported on Android 12 and below.
continuous: true,
// [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.
Expand Down
35 changes: 30 additions & 5 deletions ios/ExpoSpeechRecognitionModule.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ public class ExpoSpeechRecognitionModule: Module {
// This is a temporary workaround until the issue is fixed in a future iOS release
var hasSeenFinalResult: Bool = false

// Hack for iOS 18 to avoid sending a "nomatch" event after the final-final result
// Example event order emitted in iOS 18:
// [
// { isFinal: false, transcripts: ["actually", "final", "results"], metadata: { duration: 1500 } },
// { isFinal: true, transcripts: [] }
// ]
var previousResult: SFSpeechRecognitionResult?

public func definition() -> ModuleDefinition {
// Sets the name of the module that JavaScript code will use to refer to the module. Takes a string as an argument.
// Can be inferred from module's class name, but it's recommended to set it explicitly for clarity.
Expand Down Expand Up @@ -130,6 +138,9 @@ public class ExpoSpeechRecognitionModule: Module {
do {
let currentLocale = await speechRecognizer?.getLocale()

// Reset the previous result
self.previousResult = nil

// Re-create the speech recognizer when locales change
if self.speechRecognizer == nil || currentLocale != options.lang {
guard let locale = resolveLocale(localeIdentifier: options.lang) else {
Expand Down Expand Up @@ -358,12 +369,14 @@ public class ExpoSpeechRecognitionModule: Module {

func sendErrorAndStop(error: String, message: String) {
hasSeenFinalResult = false
previousResult = nil
sendEvent("error", ["error": error, "message": message])
sendEvent("end")
}

func handleEnd() {
hasSeenFinalResult = false
previousResult = nil
sendEvent("end")
}

Expand Down Expand Up @@ -422,11 +435,21 @@ public class ExpoSpeechRecognitionModule: Module {
}

if isFinal && results.isEmpty {
// https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition/nomatch_event
// The nomatch event of the Web Speech API is fired
// when the speech recognition service returns a final result with no significant recognition.
sendEvent("nomatch")
return
// Hack for iOS 18 to avoid sending a "nomatch" event after the final-final result
var previousResultWasFinal = false
var previousResultHadTranscriptions = false
if #available(iOS 18.0, *), let previousResult = previousResult {
previousResultWasFinal = previousResult.speechRecognitionMetadata?.speechDuration ?? 0 > 0
previousResultHadTranscriptions = !previousResult.transcriptions.isEmpty
}

if !previousResultWasFinal || !previousResultHadTranscriptions {
// https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition/nomatch_event
// The nomatch event of the Web Speech API is fired
// when the speech recognition service returns a final result with no significant recognition.
sendEvent("nomatch")
return
}
}

sendEvent(
Expand All @@ -436,6 +459,8 @@ public class ExpoSpeechRecognitionModule: Module {
"results": results.map { $0.toDictionary() },
]
)

previousResult = result
}

func handleRecognitionError(_ error: Error) {
Expand Down
5 changes: 4 additions & 1 deletion src/ExpoSpeechRecognitionModule.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,10 @@ export type ExpoSpeechRecognitionOptions = {
*
* Not supported on Android 12 and below.
*
* If false on iOS, recognition will run until no speech is detected for 3 seconds.
* If false, the behaviors are the following:
*
* - on iOS 17-, recognition will run until no speech is detected for 3 seconds.
* - on iOS 18+ and Android, recognition will run until a result with `isFinal: true` is received.
*/
continuous?: boolean;
/** [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.
Expand Down