Skip to content

feat(api): adding srt and vtt support for audio transcriptions. #472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,17 @@ class AudioResponseFormat @JsonCreator private constructor(private val value: Js
*/
@JvmSynthetic internal fun validity(): Int = if (value() == Value._UNKNOWN) 0 else 1

@JvmSynthetic
internal fun isJson(): Boolean =
when (this) {
JSON -> true
TEXT -> false
SRT -> false
VERBOSE_JSON -> true
VTT -> false
else -> false
}

override fun equals(other: Any?): Boolean {
if (this === other) {
return true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ import com.openai.core.handlers.errorHandler
import com.openai.core.handlers.jsonHandler
import com.openai.core.handlers.mapJson
import com.openai.core.handlers.sseHandler
import com.openai.core.handlers.stringHandler
import com.openai.core.handlers.withErrorHandler
import com.openai.core.http.AsyncStreamResponse
import com.openai.core.http.HttpMethod
import com.openai.core.http.HttpRequest
import com.openai.core.http.HttpResponse
import com.openai.core.http.HttpResponse.Handler
import com.openai.core.http.HttpResponseFor
import com.openai.core.http.StreamResponse
Expand All @@ -22,10 +24,12 @@ import com.openai.core.http.parseable
import com.openai.core.http.toAsync
import com.openai.core.prepareAsync
import com.openai.models.ErrorObject
import com.openai.models.audio.transcriptions.Transcription
import com.openai.models.audio.transcriptions.TranscriptionCreateParams
import com.openai.models.audio.transcriptions.TranscriptionCreateResponse
import com.openai.models.audio.transcriptions.TranscriptionStreamEvent
import java.util.concurrent.CompletableFuture
import kotlin.jvm.optionals.getOrNull

class TranscriptionServiceAsyncImpl internal constructor(private val clientOptions: ClientOptions) :
TranscriptionServiceAsync {
Expand Down Expand Up @@ -58,9 +62,19 @@ class TranscriptionServiceAsyncImpl internal constructor(private val clientOptio

private val errorHandler: Handler<ErrorObject?> = errorHandler(clientOptions.jsonMapper)

private val createHandler: Handler<TranscriptionCreateResponse> =
private val createJsonHandler: Handler<TranscriptionCreateResponse> =
jsonHandler<TranscriptionCreateResponse>(clientOptions.jsonMapper)
.withErrorHandler(errorHandler)
private val createStringHandler: Handler<TranscriptionCreateResponse> =
object : Handler<TranscriptionCreateResponse> {

private val stringHandler = stringHandler().withErrorHandler(errorHandler)

override fun handle(response: HttpResponse): TranscriptionCreateResponse =
TranscriptionCreateResponse.ofTranscription(
Transcription.builder().text(stringHandler.handle(response)).build()
)
}

override fun create(
params: TranscriptionCreateParams,
Expand All @@ -81,9 +95,13 @@ class TranscriptionServiceAsyncImpl internal constructor(private val clientOptio
return request
.thenComposeAsync { clientOptions.httpClient.executeAsync(it, requestOptions) }
.thenApply { response ->
val handler =
if (params.responseFormat().getOrNull()?.isJson() != false)
createJsonHandler
else createStringHandler
response.parseable {
response
.use { createHandler.handle(it) }
.use { handler.handle(it) }
.also {
if (requestOptions.responseValidation!!) {
it.validate()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ import com.openai.core.handlers.errorHandler
import com.openai.core.handlers.jsonHandler
import com.openai.core.handlers.mapJson
import com.openai.core.handlers.sseHandler
import com.openai.core.handlers.stringHandler
import com.openai.core.handlers.withErrorHandler
import com.openai.core.http.HttpMethod
import com.openai.core.http.HttpRequest
import com.openai.core.http.HttpResponse
import com.openai.core.http.HttpResponse.Handler
import com.openai.core.http.HttpResponseFor
import com.openai.core.http.StreamResponse
Expand All @@ -20,9 +22,11 @@ import com.openai.core.http.multipartFormData
import com.openai.core.http.parseable
import com.openai.core.prepare
import com.openai.models.ErrorObject
import com.openai.models.audio.transcriptions.Transcription
import com.openai.models.audio.transcriptions.TranscriptionCreateParams
import com.openai.models.audio.transcriptions.TranscriptionCreateResponse
import com.openai.models.audio.transcriptions.TranscriptionStreamEvent
import kotlin.jvm.optionals.getOrNull

class TranscriptionServiceImpl internal constructor(private val clientOptions: ClientOptions) :
TranscriptionService {
Expand Down Expand Up @@ -52,9 +56,19 @@ class TranscriptionServiceImpl internal constructor(private val clientOptions: C

private val errorHandler: Handler<ErrorObject?> = errorHandler(clientOptions.jsonMapper)

private val createHandler: Handler<TranscriptionCreateResponse> =
private val createJsonHandler: Handler<TranscriptionCreateResponse> =
jsonHandler<TranscriptionCreateResponse>(clientOptions.jsonMapper)
.withErrorHandler(errorHandler)
private val createStringHandler: Handler<TranscriptionCreateResponse> =
object : Handler<TranscriptionCreateResponse> {

private val stringHandler = stringHandler().withErrorHandler(errorHandler)

override fun handle(response: HttpResponse): TranscriptionCreateResponse =
TranscriptionCreateResponse.ofTranscription(
Transcription.builder().text(stringHandler.handle(response)).build()
)
}

override fun create(
params: TranscriptionCreateParams,
Expand All @@ -70,8 +84,11 @@ class TranscriptionServiceImpl internal constructor(private val clientOptions: C
val requestOptions = requestOptions.applyDefaults(RequestOptions.from(clientOptions))
val response = clientOptions.httpClient.execute(request, requestOptions)
return response.parseable {
val handler =
if (params.responseFormat().getOrNull()?.isJson() != false) createJsonHandler
else createStringHandler
response
.use { createHandler.handle(it) }
.use { handler.handle(it) }
.also {
if (requestOptions.responseValidation!!) {
it.validate()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.openai.example;

import com.openai.client.OpenAIClient;
import com.openai.client.okhttp.OpenAIOkHttpClient;
import com.openai.models.audio.AudioModel;
import com.openai.models.audio.AudioResponseFormat;
import com.openai.models.audio.transcriptions.Transcription;
import com.openai.models.audio.transcriptions.TranscriptionCreateParams;
import java.nio.file.Path;
import java.nio.file.Paths;

public final class AudioTranscriptionsVttExample {
private AudioTranscriptionsVttExample() {}

public static void main(String[] args) throws Exception {
// Configures using one of:
// - The `OPENAI_API_KEY` environment variable
// - The `OPENAI_BASE_URL` and `AZURE_OPENAI_KEY` environment variables
OpenAIClient client = OpenAIOkHttpClient.fromEnv();

ClassLoader classloader = Thread.currentThread().getContextClassLoader();
Path path = Paths.get(classloader.getResource("sports.wav").toURI());

TranscriptionCreateParams createParams = TranscriptionCreateParams.builder()
.file(path)
.model(AudioModel.WHISPER_1)
.responseFormat(AudioResponseFormat.VTT)
.build();

Transcription transcription =
client.audio().transcriptions().create(createParams).asTranscription();
System.out.println(transcription.text());
}
}