jamsch · Oct 31, 2024
diff --git a/‎README.md
+63-12 b/‎README.md
+63-12
diff --git a/‎android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt
+20-12 b/‎android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt
+20-12
diff --git a/‎android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionOptions.kt
+11 b/‎android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionOptions.kt
+11
diff --git a/‎android/src/main/java/expo/modules/speechrecognition/ExpoSpeechService.kt
+23-3 b/‎android/src/main/java/expo/modules/speechrecognition/ExpoSpeechService.kt
+23-3
diff --git a/‎example/App.tsx
+27-2 b/‎example/App.tsx
+27-2
diff --git a/‎example/assets/avatar.png
18.8 KB b/‎example/assets/avatar.png
18.8 KB
diff --git a/‎example/babel.config.js
+2-1 b/‎example/babel.config.js
+2-1
@@ -19,6 +19,8 @@ expo-speech-recognition implements the iOS [`SFSpeechRecognizer`](https://develo
 - [Transcribing audio files](#transcribing-audio-files)
   - [Supported input audio formats](#supported-input-audio-formats)
   - [File transcription example](#file-transcription-example)
+- [Volume metering](#volume-metering)
+  - [Volume metering example](#volume-metering-example)
 - [Polyfilling the Web SpeechRecognition API](#polyfilling-the-web-speechrecognition-api)
 - [Muting the beep sound on Android](#muting-the-beep-sound-on-android)
 - [Improving accuracy of single-word prompts](#improving-accuracy-of-single-word-prompts)
@@ -237,12 +239,15 @@ ExpoSpeechRecognitionModule.start({
   // The maximum number of alternative transcriptions to return.
   maxAlternatives: 1,
   // [Default: false] Continuous recognition.
-  // If false on iOS, recognition will run until no speech is detected for 3 seconds.
+  // If false:
+  //    - on iOS 17-, recognition will run until no speech is detected for 3 seconds.
+  //    - on iOS 18+ and Android, recognition will run until a final result is received.
   // Not supported on Android 12 and below.
   continuous: true,
   // [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.
   requiresOnDeviceRecognition: false,
   // [Default: false] Include punctuation in the recognition results. This applies to full stops and commas.
+  // Not supported on Android 12 and below. On Android 13+, only supported when on-device recognition is enabled.
   addsPunctuation: false,
   // [Default: undefined] Short custom phrases that are unique to your app.
   contextualStrings: ["Carlsen", "Nepomniachtchi", "Praggnanandhaa"],
@@ -297,6 +302,13 @@ ExpoSpeechRecognitionModule.start({
     // Default: 50ms for network-based recognition, 15ms for on-device recognition
     chunkDelayMillis: undefined,
   },
+  // Settings for volume change events.
+  volumeChangeEventOptions: {
+    // [Default: false] Whether to emit the `volumechange` events when the input volume changes.
+    enabled: false,
+    // [Default: 100ms on iOS] The interval (in milliseconds) to emit `volumechange` events.
+    intervalMillis: 300,
+  },
 });
 
 // Stop capturing audio (and emit a final result if there is one)
@@ -310,17 +322,18 @@ ExpoSpeechRecognitionModule.abort();
 
 Events are largely based on the [Web Speech API](https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition). The following events are supported:
 
-| Event Name    | Description                                                                                | Notes                                                                                                                                                                                                                                                                                    |
-| ------------- | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `audiostart`  | Audio capturing has started                                                                | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
-| `audioend`    | Audio capturing has ended                                                                  | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
-| `end`         | Speech recognition service has disconnected.                                               | This should always be the last event dispatched, including after errors.                                                                                                                                                                                                                 |
-| `error`       | Fired when a speech recognition error occurs.                                              | You'll also receive an `error` event (with code "aborted") when calling `.abort()`                                                                                                                                                                                                       |
-| `nomatch`     | Speech recognition service returns a final result with no significant recognition.         | You may have non-final results recognized. This may get emitted after cancellation.                                                                                                                                                                                                      |
-| `result`      | Speech recognition service returns a word or phrase has been positively recognized.        | On Android, continous mode runs as a segmented session, meaning when a final result is reached, additional partial and final results will cover a new segment separate from the previous final result. On iOS, you should expect one final result before speech recognition has stopped. |
-| `speechstart` | Fired when any sound — recognizable speech or not — has been detected                      | On iOS, this will fire once in the session after a result has occurred                                                                                                                                                                                                                   |
-| `speechend`   | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS                                                                                                                                                                                                                                                                 |
-| `start`       | Speech recognition has started                                                             | Use this event to indicate to the user when to speak.                                                                                                                                                                                                                                    |
+| Event Name     | Description                                                                                | Notes                                                                                                                                                                                                                                                                                    |
+| -------------- | ------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `audiostart`   | Audio capturing has started                                                                | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
+| `audioend`     | Audio capturing has ended                                                                  | Includes the `uri` if `recordingOptions.persist` is enabled.                                                                                                                                                                                                                             |
+| `end`          | Speech recognition service has disconnected.                                               | This should always be the last event dispatched, including after errors.                                                                                                                                                                                                                 |
+| `error`        | Fired when a speech recognition error occurs.                                              | You'll also receive an `error` event (with code "aborted") when calling `.abort()`                                                                                                                                                                                                       |
+| `nomatch`      | Speech recognition service returns a final result with no significant recognition.         | You may have non-final results recognized. This may get emitted after cancellation.                                                                                                                                                                                                      |
+| `result`       | Speech recognition service returns a word or phrase has been positively recognized.        | On Android, continous mode runs as a segmented session, meaning when a final result is reached, additional partial and final results will cover a new segment separate from the previous final result. On iOS, you should expect one final result before speech recognition has stopped. |
+| `speechstart`  | Fired when any sound — recognizable speech or not — has been detected                      | On iOS, this will fire once in the session after a result has occurred                                                                                                                                                                                                                   |
+| `speechend`    | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS                                                                                                                                                                                                                                                                 |
+| `start`        | Speech recognition has started                                                             | Use this event to indicate to the user when to speak.                                                                                                                                                                                                                                    |
+| `volumechange` | Fired when the input volume changes.                                                       | Returns a value between -2 and 10 indicating the volume of the input audio. Consider anything below 0 to be inaudible.                                                                                                                                                                   |
 
 ## Handling Errors
 
@@ -528,6 +541,44 @@ function TranscribeAudioFile() {
 }
 ```
 
+## Volume metering
+
+You can use the `volumeChangeEventOptions.enabled` option to enable volume metering. This will emit a `volumechange` event with the current volume level (between -2 and 10) as a value. You can use this value to animate the volume metering of a user's voice, or to provide feedback to the user about the volume level.
+
+### Volume metering example
+
+![Volume metering example](./images/volume-metering.gif)
+
+See: [VolumeMeteringAvatar.tsx](https://github.com/jamsch/expo-speech-recognition/tree/main/example/components/VolumeMeteringAvatar.tsx) for a complete example that involves using `react-native-reanimated` to animate the volume metering.
+
+```tsx
+import { Button } from "react-native";
+import {
+  ExpoSpeechRecognitionModule,
+  useSpeechRecognitionEvent,
+} from "expo-speech-recognition";
+
+function VolumeMeteringExample() {
+  useSpeechRecognitionEvent("volumechange", (event) => {
+    // a value between -2 and 10. <= 0 is inaudible
+    console.log("Volume changed to:", event.value);
+  });
+
+  const handleStart = () => {
+    ExpoSpeechRecognitionModule.start({
+      lang: "en-US",
+      volumeChangeEventOptions: {
+        enabled: true,
+        // how often you want to receive the volumechange event
+        intervalMillis: 300,
+      },
+    });
+  };
+
+  return <Button title="Start" onPress={handleStart} />;
+}
+```
+
 ## Polyfilling the Web SpeechRecognition API
 
 > [!IMPORTANT]
 
@@ -86,6 +86,8 @@ class ExpoSpeechRecognitionModule : Module() {
                 "start",
                 // Called when there's results (as a string array, not API compliant)
                 "results",
+                // Fired when the input volume changes
+                "volumechange",
             )
 
             Function("getDefaultRecognitionService") {
@@ -325,26 +327,32 @@ class ExpoSpeechRecognitionModule : Module() {
         promise: Promise,
     ) {
         if (Build.VERSION.SDK_INT < Build.VERSION_CODES.TIRAMISU) {
-            promise.resolve(mapOf(
-                "locales" to mutableListOf<String>(),
-                "installedLocales" to mutableListOf<String>(),
-            ))
+            promise.resolve(
+                mapOf(
+                    "locales" to mutableListOf<String>(),
+                    "installedLocales" to mutableListOf<String>(),
+                ),
+            )
             return
         }
 
         if (options.androidRecognitionServicePackage == null && !SpeechRecognizer.isOnDeviceRecognitionAvailable(appContext)) {
-            promise.resolve(mapOf(
-                "locales" to mutableListOf<String>(),
-                "installedLocales" to mutableListOf<String>(),
-            ))
+            promise.resolve(
+                mapOf(
+                    "locales" to mutableListOf<String>(),
+                    "installedLocales" to mutableListOf<String>(),
+                ),
+            )
             return
         }
 
         if (options.androidRecognitionServicePackage != null && !SpeechRecognizer.isRecognitionAvailable(appContext)) {
-            promise.resolve(mapOf(
-                "locales" to mutableListOf<String>(),
-                "installedLocales" to mutableListOf<String>(),
-            ))
+            promise.resolve(
+                mapOf(
+                    "locales" to mutableListOf<String>(),
+                    "installedLocales" to mutableListOf<String>(),
+                ),
+            )
             return
         }
 
 
@@ -50,6 +50,17 @@ class SpeechRecognitionOptions : Record {
 
     @Field
     val iosCategory: Map<String, Any>? = null
+
+    @Field
+    val volumeChangeEventOptions: VolumeChangeEventOptions? = null
+}
+
+class VolumeChangeEventOptions : Record {
+    @Field
+    val enabled: Boolean? = false
+
+    @Field
+    val intervalMillis: Int? = null
 }
 
 class RecordingOptions : Record {
 
@@ -50,6 +50,9 @@ class ExpoSpeechService(
     private var speech: SpeechRecognizer? = null
     private val mainHandler = Handler(Looper.getMainLooper())
 
+    private lateinit var options: SpeechRecognitionOptions
+    private var lastVolumeChangeEventTime: Long = 0L
+
     /** Audio recorder for persisting audio */
     private var audioRecorder: ExpoAudioRecorder? = null
 
@@ -108,6 +111,7 @@ class ExpoSpeechService(
 
     /** Starts speech recognition */
     fun start(options: SpeechRecognitionOptions) {
+        this.options = options
         mainHandler.post {
             log("Start recognition.")
 
@@ -119,6 +123,7 @@ class ExpoSpeechService(
             delayedFileStreamer = null
             recognitionState = RecognitionState.STARTING
             soundState = SoundState.INACTIVE
+            lastVolumeChangeEventTime = 0L
             try {
                 val intent = createSpeechIntent(options)
                 speech = createSpeechRecognizer(options)
@@ -428,11 +433,11 @@ class ExpoSpeechService(
      */
     private fun resolveSourceUri(sourceUri: String): File =
         when {
-            // Local file path without URI scheme
-            !sourceUri.startsWith("https://") && !sourceUri.startsWith("file://") -> File(sourceUri)
-
             // File URI
             sourceUri.startsWith("file://") -> File(URI(sourceUri))
+            
+            // Local file path without URI scheme
+            !sourceUri.startsWith("https://") -> File(sourceUri)
 
             // HTTP URI - throw an error
             else -> {
@@ -454,6 +459,21 @@ class ExpoSpeechService(
     }
 
     override fun onRmsChanged(rmsdB: Float) {
+        if (options.volumeChangeEventOptions?.enabled != true) {
+            return
+        }
+
+        val intervalMs = options.volumeChangeEventOptions?.intervalMillis
+
+        if (intervalMs == null) {
+            sendEvent("volumechange", mapOf("value" to rmsdB))
+        } else {
+            val currentTime = System.currentTimeMillis()
+            if (currentTime - lastVolumeChangeEventTime >= intervalMs) {
+                sendEvent("volumechange", mapOf("value" to rmsdB))
+                lastVolumeChangeEventTime = currentTime
+            }
+        }
         /*
         val isSilent = rmsdB <= 0
 
 
@@ -47,6 +47,7 @@ import {
   AndroidOutputFormat,
   IOSOutputFormat,
 } from "expo-av/build/Audio";
+import { VolumeMeteringAvatar } from "./components/VolumeMeteringAvatar";
 
 const speechRecognitionServices = getSpeechRecognitionServices();
 
@@ -71,7 +72,16 @@ export default function App() {
     continuous: true,
     requiresOnDeviceRecognition: false,
     addsPunctuation: true,
-    contextualStrings: ["Carlsen", "Ian Nepomniachtchi", "Praggnanandhaa"],
+    contextualStrings: [
+      "expo-speech-recognition",
+      "Carlsen",
+      "Ian Nepomniachtchi",
+      "Praggnanandhaa",
+    ],
+    volumeChangeEventOptions: {
+      enabled: false,
+      intervalMillis: 300,
+    },
   });
 
   useSpeechRecognitionEvent("result", (ev) => {
@@ -140,6 +150,10 @@ export default function App() {
     <SafeAreaView style={styles.container}>
       <StatusBar style="dark" translucent={false} />
 
+      {settings.volumeChangeEventOptions?.enabled ? (
+        <VolumeMeteringAvatar />
+      ) : null}
+
       <View style={styles.card}>
         <Text style={styles.text}>
           {error ? JSON.stringify(error) : "Error messages go here"}
@@ -510,6 +524,17 @@ function GeneralSettings(props: {
           checked={Boolean(settings.continuous)}
           onPress={() => handleChange("continuous", !settings.continuous)}
         />
+
+        <CheckboxButton
+          title="Volume events"
+          checked={Boolean(settings.volumeChangeEventOptions?.enabled)}
+          onPress={() =>
+            handleChange("volumeChangeEventOptions", {
+              enabled: !settings.volumeChangeEventOptions?.enabled,
+              intervalMillis: settings.volumeChangeEventOptions?.intervalMillis,
+            })
+          }
+        />
       </View>
 
       <View style={styles.textOptionContainer}>
@@ -714,7 +739,7 @@ function AndroidSettings(props: {
               onPress={() =>
                 handleChange("androidIntentOptions", {
                   ...settings.androidIntentOptions,
-                  [key]: !settings.androidIntentOptions?.[key] ?? false,
+                  [key]: !settings.androidIntentOptions?.[key],
                 })
               }
             />
 
@@ -1,9 +1,10 @@
 const path = require("path");
-module.exports = function (api) {
+module.exports = (api) => {
   api.cache(true);
   return {
     presets: ["babel-preset-expo"],
     plugins: [
+      "react-native-reanimated/plugin",
       [
         "module-resolver",
         {
Original file line number	Diff line number	Diff line change
`@@ -1,9 +1,10 @@`
`1`	`1`	`const path = require("path");`
`2`		`-module.exports = function (api) {`
	`2`	`+module.exports = (api) => {`
`3`	`3`	`api.cache(true);`
`4`	`4`	`return {`
`5`	`5`	`presets: ["babel-preset-expo"],`
`6`	`6`	`plugins: [`
	`7`	`+ "react-native-reanimated/plugin",`
`7`	`8`	`[`
`8`	`9`	`"module-resolver",`
`9`	`10`	`{`