Skip to content

Commit 9076e15

Browse files
committed
Merge branch 'main' into feat/android-language-detection
2 parents e9690a6 + 131d0ce commit 9076e15

27 files changed

+1155
-345
lines changed

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 jamsch
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+62-5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ expo-speech-recognition implements the iOS [`SFSpeechRecognizer`](https://develo
1919
- [Transcribing audio files](#transcribing-audio-files)
2020
- [Supported input audio formats](#supported-input-audio-formats)
2121
- [File transcription example](#file-transcription-example)
22+
- [Volume metering](#volume-metering)
23+
- [Volume metering example](#volume-metering-example)
2224
- [Polyfilling the Web SpeechRecognition API](#polyfilling-the-web-speechrecognition-api)
2325
- [Muting the beep sound on Android](#muting-the-beep-sound-on-android)
2426
- [Improving accuracy of single-word prompts](#improving-accuracy-of-single-word-prompts)
@@ -58,23 +60,29 @@ npm install expo-speech-recognition
5860

5961
> The config plugin updates the Android App Manifest to include package visibility filtering for `com.google.android.googlequicksearchbox` (Google's Speech Recognition) along with the required permissions for Android and iOS.
6062
63+
To configure `androidSpeechServicePackages`, add additional speech service packages here that aren't listed under the `forceQueryable` section when running the command: `adb shell dumpsys package queries`.
64+
6165
```js
6266
// app.json
6367
{
6468
"expo": {
6569
"plugins": [
70+
"plugin-one",
71+
"plugin-two",
72+
73+
// no config (v0.2.22+)
74+
"expo-speech-recognition",
75+
76+
// or with config
6677
[
6778
"expo-speech-recognition",
6879
{
6980
"microphonePermission": "Allow $(PRODUCT_NAME) to use the microphone.",
7081
"speechRecognitionPermission": "Allow $(PRODUCT_NAME) to use speech recognition.",
71-
// Add additional speech service packages here that aren't listed
72-
// under the `forceQueryable` section when running the command:
73-
// "adb shell dumpsys package queries"
74-
// default: ["com.google.android.googlequicksearchbox"]
7582
"androidSpeechServicePackages": ["com.google.android.googlequicksearchbox"]
7683
}
7784
]
85+
// rest of your plugins
7886
]
7987
}
8088
}
@@ -231,12 +239,15 @@ ExpoSpeechRecognitionModule.start({
231239
// The maximum number of alternative transcriptions to return.
232240
maxAlternatives: 1,
233241
// [Default: false] Continuous recognition.
234-
// If false on iOS, recognition will run until no speech is detected for 3 seconds.
242+
// If false:
243+
// - on iOS 17-, recognition will run until no speech is detected for 3 seconds.
244+
// - on iOS 18+ and Android, recognition will run until a final result is received.
235245
// Not supported on Android 12 and below.
236246
continuous: true,
237247
// [Default: false] Prevent device from sending audio over the network. Only enabled if the device supports it.
238248
requiresOnDeviceRecognition: false,
239249
// [Default: false] Include punctuation in the recognition results. This applies to full stops and commas.
250+
// Not supported on Android 12 and below. On Android 13+, only supported when on-device recognition is enabled.
240251
addsPunctuation: false,
241252
// [Default: undefined] Short custom phrases that are unique to your app.
242253
contextualStrings: ["Carlsen", "Nepomniachtchi", "Praggnanandhaa"],
@@ -291,6 +302,13 @@ ExpoSpeechRecognitionModule.start({
291302
// Default: 50ms for network-based recognition, 15ms for on-device recognition
292303
chunkDelayMillis: undefined,
293304
},
305+
// Settings for volume change events.
306+
volumeChangeEventOptions: {
307+
// [Default: false] Whether to emit the `volumechange` events when the input volume changes.
308+
enabled: false,
309+
// [Default: 100ms on iOS] The interval (in milliseconds) to emit `volumechange` events.
310+
intervalMillis: 300,
311+
},
294312
});
295313

296314
// Stop capturing audio (and emit a final result if there is one)
@@ -315,6 +333,7 @@ Events are largely based on the [Web Speech API](https://developer.mozilla.org/e
315333
| `speechstart` | Fired when any sound — recognizable speech or not — has been detected | On iOS, this will fire once in the session after a result has occurred |
316334
| `speechend` | Fired when speech recognized by the speech recognition service has stopped being detected. | Not supported yet on iOS |
317335
| `start` | Speech recognition has started | Use this event to indicate to the user when to speak. |
336+
| `volumechange` | Fired when the input volume changes. | Returns a value between -2 and 10 indicating the volume of the input audio. Consider anything below 0 to be inaudible. |
318337
| `languagedetection` | Called when the language detection (and switching) results are available. | Android 14+ only. Enabled with `EXTRA_ENABLE_LANGUAGE_DETECTION` in the `androidIntent` option when starting. Also can be called multiple times by enabling `EXTRA_ENABLE_LANGUAGE_SWITCH`. |
319338

320339
## Handling Errors
@@ -523,6 +542,44 @@ function TranscribeAudioFile() {
523542
}
524543
```
525544

545+
## Volume metering
546+
547+
You can use the `volumeChangeEventOptions.enabled` option to enable volume metering. This will emit a `volumechange` event with the current volume level (between -2 and 10) as a value. You can use this value to animate the volume metering of a user's voice, or to provide feedback to the user about the volume level.
548+
549+
### Volume metering example
550+
551+
![Volume metering example](./images/volume-metering.gif)
552+
553+
See: [VolumeMeteringAvatar.tsx](https://github.com/jamsch/expo-speech-recognition/tree/main/example/components/VolumeMeteringAvatar.tsx) for a complete example that involves using `react-native-reanimated` to animate the volume metering.
554+
555+
```tsx
556+
import { Button } from "react-native";
557+
import {
558+
ExpoSpeechRecognitionModule,
559+
useSpeechRecognitionEvent,
560+
} from "expo-speech-recognition";
561+
562+
function VolumeMeteringExample() {
563+
useSpeechRecognitionEvent("volumechange", (event) => {
564+
// a value between -2 and 10. <= 0 is inaudible
565+
console.log("Volume changed to:", event.value);
566+
});
567+
568+
const handleStart = () => {
569+
ExpoSpeechRecognitionModule.start({
570+
lang: "en-US",
571+
volumeChangeEventOptions: {
572+
enabled: true,
573+
// how often you want to receive the volumechange event
574+
intervalMillis: 300,
575+
},
576+
});
577+
};
578+
579+
return <Button title="Start" onPress={handleStart} />;
580+
}
581+
```
582+
526583
## Polyfilling the Web SpeechRecognition API
527584

528585
> [!IMPORTANT]

android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionModule.kt

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ class ExpoSpeechRecognitionModule : Module() {
8888
"results",
8989
// Called when the language detection (and switching) results are available.
9090
"languagedetection",
91+
// Fired when the input volume changes
92+
"volumechange",
9193
)
9294

9395
Function("getDefaultRecognitionService") {

android/src/main/java/expo/modules/speechrecognition/ExpoSpeechRecognitionOptions.kt

+11
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ class SpeechRecognitionOptions : Record {
5050

5151
@Field
5252
val iosCategory: Map<String, Any>? = null
53+
54+
@Field
55+
val volumeChangeEventOptions: VolumeChangeEventOptions? = null
56+
}
57+
58+
class VolumeChangeEventOptions : Record {
59+
@Field
60+
val enabled: Boolean? = false
61+
62+
@Field
63+
val intervalMillis: Int? = null
5364
}
5465

5566
class RecordingOptions : Record {

android/src/main/java/expo/modules/speechrecognition/ExpoSpeechService.kt

+23-3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ class ExpoSpeechService(
5050
private var speech: SpeechRecognizer? = null
5151
private val mainHandler = Handler(Looper.getMainLooper())
5252

53+
private lateinit var options: SpeechRecognitionOptions
54+
private var lastVolumeChangeEventTime: Long = 0L
55+
5356
/** Audio recorder for persisting audio */
5457
private var audioRecorder: ExpoAudioRecorder? = null
5558

@@ -108,6 +111,7 @@ class ExpoSpeechService(
108111

109112
/** Starts speech recognition */
110113
fun start(options: SpeechRecognitionOptions) {
114+
this.options = options
111115
mainHandler.post {
112116
log("Start recognition.")
113117

@@ -119,6 +123,7 @@ class ExpoSpeechService(
119123
delayedFileStreamer = null
120124
recognitionState = RecognitionState.STARTING
121125
soundState = SoundState.INACTIVE
126+
lastVolumeChangeEventTime = 0L
122127
try {
123128
val intent = createSpeechIntent(options)
124129
speech = createSpeechRecognizer(options)
@@ -428,11 +433,11 @@ class ExpoSpeechService(
428433
*/
429434
private fun resolveSourceUri(sourceUri: String): File =
430435
when {
431-
// Local file path without URI scheme
432-
!sourceUri.startsWith("https://") && !sourceUri.startsWith("file://") -> File(sourceUri)
433-
434436
// File URI
435437
sourceUri.startsWith("file://") -> File(URI(sourceUri))
438+
439+
// Local file path without URI scheme
440+
!sourceUri.startsWith("https://") -> File(sourceUri)
436441

437442
// HTTP URI - throw an error
438443
else -> {
@@ -454,6 +459,21 @@ class ExpoSpeechService(
454459
}
455460

456461
override fun onRmsChanged(rmsdB: Float) {
462+
if (options.volumeChangeEventOptions?.enabled != true) {
463+
return
464+
}
465+
466+
val intervalMs = options.volumeChangeEventOptions?.intervalMillis
467+
468+
if (intervalMs == null) {
469+
sendEvent("volumechange", mapOf("value" to rmsdB))
470+
} else {
471+
val currentTime = System.currentTimeMillis()
472+
if (currentTime - lastVolumeChangeEventTime >= intervalMs) {
473+
sendEvent("volumechange", mapOf("value" to rmsdB))
474+
lastVolumeChangeEventTime = currentTime
475+
}
476+
}
457477
/*
458478
val isSilent = rmsdB <= 0
459479

app.plugin.js

+18-12
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// @ts-check
12
const {
23
withAndroidManifest,
34
AndroidConfig,
@@ -6,7 +7,7 @@ const {
67
} = require("expo/config-plugins");
78

89
/**
9-
* @type {import("expo/config-plugins").ConfigPlugin<{ packages: string[] }>}
10+
* @type {import("expo/config-plugins").ConfigPlugin<{ packages?: string[] }>}
1011
*/
1112
const withAndroidPackageVisibilityFiltering = (config, { packages = [] }) => {
1213
return withAndroidManifest(config, (config) => {
@@ -35,22 +36,27 @@ const withAndroidPackageVisibilityFiltering = (config, { packages = [] }) => {
3536
</intent>
3637
</queries>
3738
*/
38-
allPackages.forEach((pkg) => {
39+
40+
for (const pkg of allPackages) {
3941
if (
4042
!config.modResults.manifest.queries.some(
4143
(query) => query.package?.[0]?.$?.["android:name"] === pkg,
4244
)
4345
) {
4446
config.modResults.manifest.queries.push({
45-
package: { $: { "android:name": pkg } },
46-
intent: {
47-
action: {
48-
$: { "android:name": "android.speech.RecognitionService" },
47+
package: [{ $: { "android:name": pkg } }],
48+
intent: [
49+
{
50+
action: [
51+
{
52+
$: { "android:name": "android.speech.RecognitionService" },
53+
},
54+
],
4955
},
50-
},
56+
],
5157
});
5258
}
53-
});
59+
}
5460

5561
return config;
5662
});
@@ -61,7 +67,7 @@ const withAndroidPackageVisibilityFiltering = (config, { packages = [] }) => {
6167
* microphonePermission?: string;
6268
* speechRecognitionPermission?: string;
6369
* androidSpeechServicePackages?: string[];
64-
* }>}
70+
* }|undefined>}
6571
*/
6672
const withExpoSpeechRecognition = (config, props) => {
6773
if (!config.ios) {
@@ -73,12 +79,12 @@ const withExpoSpeechRecognition = (config, props) => {
7379
}
7480

7581
config.ios.infoPlist.NSSpeechRecognitionUsageDescription =
76-
props.speechRecognitionPermission ||
82+
props?.speechRecognitionPermission ||
7783
config.ios.infoPlist.NSSpeechRecognitionUsageDescription ||
7884
"Allow $(PRODUCT_NAME) to use speech recognition.";
7985

8086
config.ios.infoPlist.NSMicrophoneUsageDescription =
81-
props.microphonePermission ||
87+
props?.microphonePermission ||
8288
config.ios.infoPlist.NSMicrophoneUsageDescription ||
8389
"Allow $(PRODUCT_NAME) to use the microphone.";
8490

@@ -92,7 +98,7 @@ const withExpoSpeechRecognition = (config, props) => {
9298
[
9399
withAndroidPackageVisibilityFiltering,
94100
{
95-
packages: props.androidSpeechServicePackages,
101+
packages: props?.androidSpeechServicePackages,
96102
},
97103
],
98104
]);

example/App.tsx

+27-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import {
4747
AndroidOutputFormat,
4848
IOSOutputFormat,
4949
} from "expo-av/build/Audio";
50+
import { VolumeMeteringAvatar } from "./components/VolumeMeteringAvatar";
5051

5152
const speechRecognitionServices = getSpeechRecognitionServices();
5253

@@ -71,7 +72,16 @@ export default function App() {
7172
continuous: true,
7273
requiresOnDeviceRecognition: false,
7374
addsPunctuation: true,
74-
contextualStrings: ["Carlsen", "Ian Nepomniachtchi", "Praggnanandhaa"],
75+
contextualStrings: [
76+
"expo-speech-recognition",
77+
"Carlsen",
78+
"Ian Nepomniachtchi",
79+
"Praggnanandhaa",
80+
],
81+
volumeChangeEventOptions: {
82+
enabled: false,
83+
intervalMillis: 300,
84+
},
7585
});
7686

7787
useSpeechRecognitionEvent("result", (ev) => {
@@ -140,6 +150,10 @@ export default function App() {
140150
<SafeAreaView style={styles.container}>
141151
<StatusBar style="dark" translucent={false} />
142152

153+
{settings.volumeChangeEventOptions?.enabled ? (
154+
<VolumeMeteringAvatar />
155+
) : null}
156+
143157
<View style={styles.card}>
144158
<Text style={styles.text}>
145159
{error ? JSON.stringify(error) : "Error messages go here"}
@@ -510,6 +524,17 @@ function GeneralSettings(props: {
510524
checked={Boolean(settings.continuous)}
511525
onPress={() => handleChange("continuous", !settings.continuous)}
512526
/>
527+
528+
<CheckboxButton
529+
title="Volume events"
530+
checked={Boolean(settings.volumeChangeEventOptions?.enabled)}
531+
onPress={() =>
532+
handleChange("volumeChangeEventOptions", {
533+
enabled: !settings.volumeChangeEventOptions?.enabled,
534+
intervalMillis: settings.volumeChangeEventOptions?.intervalMillis,
535+
})
536+
}
537+
/>
513538
</View>
514539

515540
<View style={styles.textOptionContainer}>
@@ -714,7 +739,7 @@ function AndroidSettings(props: {
714739
onPress={() =>
715740
handleChange("androidIntentOptions", {
716741
...settings.androidIntentOptions,
717-
[key]: !settings.androidIntentOptions?.[key] ?? false,
742+
[key]: !settings.androidIntentOptions?.[key],
718743
})
719744
}
720745
/>

0 commit comments

Comments
 (0)