Skip to content

Commit c2017e1

Browse files
committed
Merge branch 'main' into feat/audio-levels
2 parents 6c041b4 + 1da7819 commit c2017e1

6 files changed

+73
-24
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@ ExpoSpeechRecognitionModule.start({
237237
// The maximum number of alternative transcriptions to return.
238238
maxAlternatives: 1,
239239
// [Default: false] Continuous recognition.
240-
// If false on iOS, recognition will run until no speech is detected for 3 seconds.
240+
// If false:
241+
// - on iOS 17-, recognition will run until no speech is detected for 3 seconds.
242+
// - on iOS 18+ and Android, recognition will run until a final result is received.
241243
// Not supported on Android 12 and below.
242244
continuous: true,
243245
// [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.

ios/ExpoSpeechRecognitionException.swift

+6-6
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ language-not-supported
2626
The user agent does not support the language specified in the value of lang attribute of the SpeechRecognition object. The set of supported languages is browser-dependent, and from frontend code there is no way to programmatically determine what languages a user's browser supports for speech recognition.
2727
*/
2828

29-
internal class NilRecognizerException: Exception {
29+
internal final class NilRecognizerException: Exception {
3030
override var code: String {
3131
"audio-capture"
3232
}
@@ -36,7 +36,7 @@ internal class NilRecognizerException: Exception {
3636
}
3737
}
3838

39-
internal class PermissionException: Exception {
39+
internal final class PermissionException: Exception {
4040
override var code: String {
4141
"not-allowed"
4242
}
@@ -45,7 +45,7 @@ internal class PermissionException: Exception {
4545
}
4646
}
4747

48-
internal class NotAuthorizedException: Exception {
48+
internal final class NotAuthorizedException: Exception {
4949
override var code: String {
5050
"not-allowed"
5151
}
@@ -55,7 +55,7 @@ internal class NotAuthorizedException: Exception {
5555
}
5656
}
5757

58-
internal class NotPermittedToRecordException: Exception {
58+
internal final class NotPermittedToRecordException: Exception {
5959
override var code: String {
6060
"not-allowed"
6161
}
@@ -64,7 +64,7 @@ internal class NotPermittedToRecordException: Exception {
6464
}
6565
}
6666

67-
internal class InvalidAudioModeException: GenericException<String> {
67+
internal final class InvalidAudioModeException: GenericException<String> {
6868
override var code: String {
6969
"audio-capture"
7070
}
@@ -73,7 +73,7 @@ internal class InvalidAudioModeException: GenericException<String> {
7373
}
7474
}
7575

76-
internal class RecognizerUnavilableException: Exception {
76+
internal final class RecognizerUnavilableException: Exception {
7777
override var code: String {
7878
"service-not-allowed"
7979
}

ios/ExpoSpeechRecognitionModule.swift

+30-5
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ public class ExpoSpeechRecognitionModule: Module {
4141
// This is a temporary workaround until the issue is fixed in a future iOS release
4242
var hasSeenFinalResult: Bool = false
4343

44+
// Hack for iOS 18 to avoid sending a "nomatch" event after the final-final result
45+
// Example event order emitted in iOS 18:
46+
// [
47+
// { isFinal: false, transcripts: ["actually", "final", "results"], metadata: { duration: 1500 } },
48+
// { isFinal: true, transcripts: [] }
49+
// ]
50+
var previousResult: SFSpeechRecognitionResult?
51+
4452
public func definition() -> ModuleDefinition {
4553
// Sets the name of the module that JavaScript code will use to refer to the module. Takes a string as an argument.
4654
// Can be inferred from module's class name, but it's recommended to set it explicitly for clarity.
@@ -132,6 +140,9 @@ public class ExpoSpeechRecognitionModule: Module {
132140
do {
133141
let currentLocale = await speechRecognizer?.getLocale()
134142

143+
// Reset the previous result
144+
self.previousResult = nil
145+
135146
// Re-create the speech recognizer when locales change
136147
if self.speechRecognizer == nil || currentLocale != options.lang {
137148
guard let locale = resolveLocale(localeIdentifier: options.lang) else {
@@ -363,12 +374,14 @@ public class ExpoSpeechRecognitionModule: Module {
363374

364375
func sendErrorAndStop(error: String, message: String) {
365376
hasSeenFinalResult = false
377+
previousResult = nil
366378
sendEvent("error", ["error": error, "message": message])
367379
sendEvent("end")
368380
}
369381

370382
func handleEnd() {
371383
hasSeenFinalResult = false
384+
previousResult = nil
372385
sendEvent("end")
373386
}
374387

@@ -427,11 +440,21 @@ public class ExpoSpeechRecognitionModule: Module {
427440
}
428441

429442
if isFinal && results.isEmpty {
430-
// https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition/nomatch_event
431-
// The nomatch event of the Web Speech API is fired
432-
// when the speech recognition service returns a final result with no significant recognition.
433-
sendEvent("nomatch")
434-
return
443+
// Hack for iOS 18 to avoid sending a "nomatch" event after the final-final result
444+
var previousResultWasFinal = false
445+
var previousResultHadTranscriptions = false
446+
if #available(iOS 18.0, *), let previousResult = previousResult {
447+
previousResultWasFinal = previousResult.speechRecognitionMetadata?.speechDuration ?? 0 > 0
448+
previousResultHadTranscriptions = !previousResult.transcriptions.isEmpty
449+
}
450+
451+
if !previousResultWasFinal || !previousResultHadTranscriptions {
452+
// https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition/nomatch_event
453+
// The nomatch event of the Web Speech API is fired
454+
// when the speech recognition service returns a final result with no significant recognition.
455+
sendEvent("nomatch")
456+
return
457+
}
435458
}
436459

437460
sendEvent(
@@ -441,6 +464,8 @@ public class ExpoSpeechRecognitionModule: Module {
441464
"results": results.map { $0.toDictionary() },
442465
]
443466
)
467+
468+
previousResult = result
444469
}
445470

446471
func handleRecognitionError(_ error: Error) {

ios/ExpoSpeechRecognizer.swift

+29-10
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,8 @@ actor ExpoSpeechRecognizer: ObservableObject {
295295
)
296296
}
297297

298-
// Don't run any timers if the audio source is from a file
299-
let continuous = options.continuous || isSourcedFromFile
298+
// Run timers on non-continuous mode, as long as the audio source is the mic
299+
let shouldRunTimers = !options.continuous && !isSourcedFromFile
300300
let audioEngine = self.audioEngine
301301

302302
self.task = recognizer.recognitionTask(
@@ -309,18 +309,20 @@ actor ExpoSpeechRecognizer: ObservableObject {
309309
}
310310
}
311311

312-
// Result handler
312+
// Handle the result
313313
self?.recognitionHandler(
314314
audioEngine: audioEngine,
315315
result: result,
316316
error: error,
317317
resultHandler: resultHandler,
318318
errorHandler: errorHandler,
319-
continuous: continuous
319+
continuous: options.continuous,
320+
shouldRunTimers: shouldRunTimers,
321+
canEmitInterimResults: options.interimResults
320322
)
321323
})
322324

323-
if !continuous {
325+
if shouldRunTimers {
324326
invalidateAndScheduleTimer()
325327
}
326328

@@ -463,7 +465,10 @@ actor ExpoSpeechRecognizer: ObservableObject {
463465
request = SFSpeechAudioBufferRecognitionRequest()
464466
}
465467

466-
request.shouldReportPartialResults = options.interimResults
468+
// We also force-enable partial results on non-continuous mode,
469+
// which will allow us to re-schedule timers when text is detected
470+
// These won't get emitted to the user, however
471+
request.shouldReportPartialResults = options.interimResults || !options.continuous
467472

468473
if recognizer.supportsOnDeviceRecognition {
469474
request.requiresOnDeviceRecognition = options.requiresOnDeviceRecognition
@@ -695,12 +700,25 @@ actor ExpoSpeechRecognizer: ObservableObject {
695700
error: Error?,
696701
resultHandler: @escaping (SFSpeechRecognitionResult) -> Void,
697702
errorHandler: @escaping (Error) -> Void,
698-
continuous: Bool
703+
continuous: Bool,
704+
shouldRunTimers: Bool,
705+
canEmitInterimResults: Bool
699706
) {
707+
// When a final result is returned, we should expect the task to be idle or stopping
700708
let receivedFinalResult = result?.isFinal ?? false
701709
let receivedError = error != nil
702710

703-
if let result: SFSpeechRecognitionResult {
711+
// Hack for iOS 18 to detect final results
712+
// See: https://forums.developer.apple.com/forums/thread/762952 for more info
713+
// This can be emitted multiple times during a continuous session, unlike `result.isFinal` which is only emitted once
714+
var receivedFinalLikeResult: Bool = receivedFinalResult
715+
if #available(iOS 18.0, *), !receivedFinalLikeResult {
716+
receivedFinalLikeResult = result?.speechRecognitionMetadata?.speechDuration ?? 0 > 0
717+
}
718+
719+
let shouldEmitResult = receivedFinalResult || canEmitInterimResults || receivedFinalLikeResult
720+
721+
if let result: SFSpeechRecognitionResult, shouldEmitResult {
704722
Task { @MainActor in
705723
let taskState = await task?.state
706724
// Make sure the task is running before emitting the result
@@ -720,15 +738,16 @@ actor ExpoSpeechRecognizer: ObservableObject {
720738
}
721739
}
722740

723-
if receivedFinalResult || receivedError {
741+
if (receivedFinalLikeResult && !continuous) || receivedError || receivedFinalResult {
724742
Task { @MainActor in
725743
await reset()
726744
}
745+
return
727746
}
728747

729748
// Non-continuous speech recognition
730749
// Stop the speech recognizer if the timer fires after not receiving a result for 3 seconds
731-
if !continuous && !receivedError {
750+
if shouldRunTimers && !receivedError {
732751
invalidateAndScheduleTimer()
733752
}
734753
}

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "expo-speech-recognition",
3-
"version": "0.2.22",
3+
"version": "0.2.23",
44
"description": "Speech Recognition for React Native Expo projects",
55
"main": "build/index.js",
66
"types": "build/index.d.ts",

src/ExpoSpeechRecognitionModule.types.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,10 @@ export type ExpoSpeechRecognitionOptions = {
157157
*
158158
* Not supported on Android 12 and below.
159159
*
160-
* If false on iOS, recognition will run until no speech is detected for 3 seconds.
160+
* If false, the behaviors are the following:
161+
*
162+
* - on iOS 17-, recognition will run until no speech is detected for 3 seconds.
163+
* - on iOS 18+ and Android, recognition will run until a result with `isFinal: true` is received.
161164
*/
162165
continuous?: boolean;
163166
/** [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.

0 commit comments

Comments
 (0)