From 533f0eabd409affbb331b7203db37750dbedf365 Mon Sep 17 00:00:00 2001
From: Yoshio Terada <yoterada@microsoft.com>
Date: Wed, 12 Mar 2025 10:44:22 +0900
Subject: [PATCH] First commit to fix Issue openai#211

First commit to fix Issue openai#211
This commit includes the fix described in Issue openai#211.

* Addressed the issue where Base64 encoding could not be handled.
* Improved performance by using Base64 encoding by default.
---
 .../com/openai/models/embeddings/Embedding.kt | 55 +++++++++----
 .../embeddings/EmbeddingCreateParams.kt       |  4 +-
 .../models/embeddings/EmbeddingValue.kt       | 81 +++++++++++++++++++
 .../embeddings/EmbeddingValueDeserializer.kt  | 32 ++++++++
 .../embeddings/CreateEmbeddingResponseTest.kt | 25 +++++-
 .../openai/models/embeddings/EmbeddingTest.kt | 15 +++-
 6 files changed, 189 insertions(+), 23 deletions(-)
 create mode 100644 openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt
 create mode 100644 openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValueDeserializer.kt

diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt
index f3a4543f..87738dd0 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/Embedding.kt
@@ -17,6 +17,7 @@ import com.openai.core.immutableEmptyMap
 import com.openai.core.toImmutable
 import com.openai.errors.OpenAIInvalidDataException
 import java.util.Objects
+import java.util.Optional
 
 /** Represents an embedding vector returned by embedding endpoint. */
 @NoAutoDetect
@@ -25,7 +26,7 @@ class Embedding
 private constructor(
     @JsonProperty("embedding")
     @ExcludeMissing
-    private val embedding: JsonField<List<Double>> = JsonMissing.of(),
+    private val embedding: JsonField<EmbeddingValue> = JsonMissing.of(),
     @JsonProperty("index") @ExcludeMissing private val index: JsonField<Long> = JsonMissing.of(),
     @JsonProperty("object") @ExcludeMissing private val object_: JsonValue = JsonMissing.of(),
     @JsonAnySetter private val additionalProperties: Map<String, JsonValue> = immutableEmptyMap(),
@@ -35,7 +36,7 @@ private constructor(
      * The embedding vector, which is a list of floats. The length of vector depends on the model as
      * listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings).
      */
-    fun embedding(): List<Double> = embedding.getRequired("embedding")
+    fun embedding(): EmbeddingValue = embedding.getRequired("embedding")
 
     /** The index of the embedding in the list of embeddings. */
     fun index(): Long = index.getRequired("index")
@@ -47,7 +48,9 @@ private constructor(
      * The embedding vector, which is a list of floats. The length of vector depends on the model as
      * listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings).
      */
-    @JsonProperty("embedding") @ExcludeMissing fun _embedding(): JsonField<List<Double>> = embedding
+    @JsonProperty("embedding")
+    @ExcludeMissing
+    fun _embedding(): JsonField<EmbeddingValue> = embedding
 
     /** The index of the embedding in the list of embeddings. */
     @JsonProperty("index") @ExcludeMissing fun _index(): JsonField<Long> = index
@@ -92,14 +95,21 @@ private constructor(
     /** A builder for [Embedding]. */
     class Builder internal constructor() {
 
-        private var embedding: JsonField<MutableList<Double>>? = null
+        private var embedding: JsonField<EmbeddingValue>? = null
         private var index: JsonField<Long>? = null
         private var object_: JsonValue = JsonValue.from("embedding")
         private var additionalProperties: MutableMap<String, JsonValue> = mutableMapOf()
 
         @JvmSynthetic
         internal fun from(embedding: Embedding) = apply {
-            this.embedding = embedding.embedding.map { it.toMutableList() }
+            this.embedding =
+                embedding.embedding.map {
+                    EmbeddingValue(
+                        floatEmbedding =
+                            Optional.of(it.floatEmbedding.orElse(mutableListOf()).toMutableList()),
+                        base64Embedding = it.base64Embedding,
+                    )
+                }
             index = embedding.index
             object_ = embedding.object_
             additionalProperties = embedding.additionalProperties.toMutableMap()
@@ -110,27 +120,32 @@ private constructor(
          * model as listed in the
          * [embedding guide](https://platform.openai.com/docs/guides/embeddings).
          */
-        fun embedding(embedding: List<Double>) = embedding(JsonField.of(embedding))
+        fun embedding(embedding: EmbeddingValue) = embedding(JsonField.of(embedding))
 
         /**
-         * The embedding vector, which is a list of floats. The length of vector depends on the
-         * model as listed in the
+         * The embedding vector, which is a list of floats or Base64. The float length of vector
+         * depends on the model as listed in the
          * [embedding guide](https://platform.openai.com/docs/guides/embeddings).
          */
-        fun embedding(embedding: JsonField<List<Double>>) = apply {
-            this.embedding = embedding.map { it.toMutableList() }
+        fun embedding(embedding: JsonField<EmbeddingValue>) = apply {
+            this.embedding =
+                embedding.map {
+                    EmbeddingValue(
+                        floatEmbedding =
+                            Optional.of(it.floatEmbedding.orElse(mutableListOf()).toMutableList()),
+                        base64Embedding = it.base64Embedding,
+                    )
+                }
         }
 
         /**
-         * The embedding vector, which is a list of floats. The length of vector depends on the
-         * model as listed in the
+         * The embedding vector, which is a list of floats or Base64. The float length of vector
+         * depends on the model as listed in the
          * [embedding guide](https://platform.openai.com/docs/guides/embeddings).
          */
-        fun addEmbedding(embedding: Double) = apply {
+        fun addEmbedding(embedding: EmbeddingValue) = apply {
             this.embedding =
-                (this.embedding ?: JsonField.of(mutableListOf())).also {
-                    checkKnown("embedding", it).add(embedding)
-                }
+                (this.embedding ?: JsonField.of(embedding)).also { checkKnown("embedding", it) }
         }
 
         /** The index of the embedding in the list of embeddings. */
@@ -163,7 +178,13 @@ private constructor(
 
         fun build(): Embedding =
             Embedding(
-                checkRequired("embedding", embedding).map { it.toImmutable() },
+                checkRequired("embedding", embedding).map {
+                    EmbeddingValue(
+                        floatEmbedding =
+                            Optional.of(it.floatEmbedding.orElse(mutableListOf()).toMutableList()),
+                        base64Embedding = it.base64Embedding,
+                    )
+                },
                 checkRequired("index", index),
                 object_,
                 additionalProperties.toImmutable(),
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt
index 7d1e87d8..8550e1c9 100644
--- a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt
+++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingCreateParams.kt
@@ -271,7 +271,9 @@ private constructor(
             private var input: JsonField<Input>? = null
             private var model: JsonField<EmbeddingModel>? = null
             private var dimensions: JsonField<Long> = JsonMissing.of()
-            private var encodingFormat: JsonField<EncodingFormat> = JsonMissing.of()
+            // Default EncodingFormat value is set to BASE64 for performance improvements.
+            private var encodingFormat: JsonField<EncodingFormat> =
+                JsonField.of(EncodingFormat.BASE64)
             private var user: JsonField<String> = JsonMissing.of()
             private var additionalProperties: MutableMap<String, JsonValue> = mutableMapOf()
 
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt
new file mode 100644
index 00000000..ab20d507
--- /dev/null
+++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValue.kt
@@ -0,0 +1,81 @@
+package com.openai.models.embeddings
+
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import java.nio.ByteBuffer
+import java.nio.ByteOrder
+import java.util.Base64
+import java.util.Optional
+import kotlin.collections.MutableList
+
+/** Represents an embedding vector returned by embedding endpoint. */
+@JsonDeserialize(using = EmbeddingValueDeserializer::class)
+class EmbeddingValue(
+    var base64Embedding: Optional<String> = Optional.empty(),
+    floatEmbedding: Optional<MutableList<Double>> = Optional.empty(),
+) {
+
+    /**
+     * The embedding vector, which is a list of float32.
+     * [embedding guide](https://platform.openai.com/docs/guides/embeddings).
+     */
+    var floatEmbedding: Optional<MutableList<Double>> = Optional.empty()
+        get() {
+            if (field.isPresent) {
+                return field
+            }
+            if (base64Embedding.isPresent) {
+                field = convertBase64ToFloat(base64Embedding)
+            }
+            return field
+        }
+        set(value) {
+            field = value
+        }
+
+    /**
+     * Converting Base64 float32 array to Optional<MutableList>
+     *
+     * To improve performance, requests are made in Base64 by default. However, not all developers
+     * need to decode Base64. Therefore, when a request is made in Base64, the system will
+     * internally convert the Base64 data to MutableList<Double> and make this converted data
+     * available, allowing developers to obtain both the Base64 data and the MutableList<Double>
+     * data by default.
+     */
+    private fun convertBase64ToFloat(
+        base64Embedding: Optional<String>
+    ): Optional<MutableList<Double>> {
+        // The response of Embedding returns a List<Float>(float32),
+        // but the Kotlin API handles MutableList<Double>.
+        // If we directly convert from List<Float> to MutableList<Double>,
+        // it increases the precision and changing it from float32 to double.
+        //
+        // Since JSON is assigned to MutableList<Double> from a String of JSON Value,
+        // the precision does not increase.
+        // Therefore, by first converting the Base64-decoded List<Float> to a String,
+        // and then converting the String to Double,
+        // we can handle it as MutableList<Double> without increasing the precision.
+        return base64Embedding.map { base64String ->
+            val decoded = Base64.getDecoder().decode(base64String)
+            val byteBuffer = ByteBuffer.wrap(decoded).order(ByteOrder.LITTLE_ENDIAN)
+
+            val floatList = mutableListOf<String>()
+            while (byteBuffer.hasRemaining()) {
+                floatList.add(byteBuffer.float.toString())
+            }
+            floatList.map { it.replace("f", "").toDouble() }.toMutableList()
+        }
+    }
+
+    /**
+     * Output the embedding vector as a string. By default, it will be output as both list of floats
+     * and Base64 string. if user specifies floatEmbedding, it will be output as list of floats
+     * only.
+     */
+    override fun toString(): String {
+        return if (base64Embedding.isPresent) {
+            "base64: $base64Embedding, float:  [${floatEmbedding.get().joinToString(", ")}]"
+        } else {
+            "float:  [${floatEmbedding.get().joinToString(", ")}]"
+        }
+    }
+}
diff --git a/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValueDeserializer.kt b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValueDeserializer.kt
new file mode 100644
index 00000000..f20c4309
--- /dev/null
+++ b/openai-java-core/src/main/kotlin/com/openai/models/embeddings/EmbeddingValueDeserializer.kt
@@ -0,0 +1,32 @@
+package com.openai.models.embeddings
+
+import com.fasterxml.jackson.core.JsonParser
+import com.fasterxml.jackson.databind.DeserializationContext
+import com.fasterxml.jackson.databind.JsonDeserializer
+import com.fasterxml.jackson.databind.JsonNode
+import com.fasterxml.jackson.databind.node.ArrayNode
+import java.io.IOException
+import java.util.Optional
+
+/** JsonDeserializer for EmbeddingValue */
+class EmbeddingValueDeserializer : JsonDeserializer<EmbeddingValue>() {
+    @Throws(IOException::class)
+
+    /*
+     * Deserialize the JSON representation of an EmbeddingValue.
+     * The JSON can either be an array of floats or a base64 string.
+     */
+    override fun deserialize(jp: JsonParser, ctxt: DeserializationContext): EmbeddingValue {
+        val node = jp.codec.readTree<JsonNode>(jp)
+        val embeddingValue = EmbeddingValue()
+
+        if (node.isArray) {
+            val floats = mutableListOf<Double>()
+            (node as ArrayNode).forEach { item -> floats.add(item.asDouble()) }
+            embeddingValue.floatEmbedding = Optional.of(floats)
+        } else if (node.isTextual) {
+            embeddingValue.base64Embedding = Optional.of(node.asText())
+        }
+        return embeddingValue
+    }
+}
diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/CreateEmbeddingResponseTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/CreateEmbeddingResponseTest.kt
index 25eef802..926a18d5 100644
--- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/CreateEmbeddingResponseTest.kt
+++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/CreateEmbeddingResponseTest.kt
@@ -2,6 +2,7 @@
 
 package com.openai.models.embeddings
 
+import java.util.Optional
 import org.assertj.core.api.Assertions.assertThat
 import org.junit.jupiter.api.Test
 
@@ -11,7 +12,17 @@ class CreateEmbeddingResponseTest {
     fun createCreateEmbeddingResponse() {
         val createEmbeddingResponse =
             CreateEmbeddingResponse.builder()
-                .addData(Embedding.builder().addEmbedding(0.0).index(0L).build())
+                .addData(
+                    Embedding.builder()
+                        .addEmbedding(
+                            EmbeddingValue(
+                                floatEmbedding = Optional.of(mutableListOf(0.0)),
+                                base64Embedding = Optional.empty(),
+                            )
+                        )
+                        .index(0L)
+                        .build()
+                )
                 .model("model")
                 .usage(
                     CreateEmbeddingResponse.Usage.builder().promptTokens(0L).totalTokens(0L).build()
@@ -19,7 +30,17 @@ class CreateEmbeddingResponseTest {
                 .build()
         assertThat(createEmbeddingResponse).isNotNull
         assertThat(createEmbeddingResponse.data())
-            .containsExactly(Embedding.builder().addEmbedding(0.0).index(0L).build())
+            .containsExactly(
+                Embedding.builder()
+                    .addEmbedding(
+                        EmbeddingValue(
+                            floatEmbedding = Optional.of(mutableListOf(0.0)),
+                            base64Embedding = Optional.empty(),
+                        )
+                    )
+                    .index(0L)
+                    .build()
+            )
         assertThat(createEmbeddingResponse.model()).isEqualTo("model")
         assertThat(createEmbeddingResponse.usage())
             .isEqualTo(
diff --git a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt
index 41286a5a..bf8207f9 100644
--- a/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt
+++ b/openai-java-core/src/test/kotlin/com/openai/models/embeddings/EmbeddingTest.kt
@@ -1,7 +1,7 @@
 // File generated from our OpenAPI spec by Stainless.
 
 package com.openai.models.embeddings
-
+import java.util.Optional
 import org.assertj.core.api.Assertions.assertThat
 import org.junit.jupiter.api.Test
 
@@ -9,9 +9,18 @@ class EmbeddingTest {
 
     @Test
     fun createEmbedding() {
-        val embedding = Embedding.builder().addEmbedding(0.0).index(0L).build()
+        val embedding =
+            Embedding.builder()
+                .addEmbedding(
+                    EmbeddingValue(
+                        floatEmbedding = Optional.of(mutableListOf(0.0)),
+                        base64Embedding = Optional.empty(),
+                    )
+                )
+                .build()
         assertThat(embedding).isNotNull
-        assertThat(embedding.embedding()).containsExactly(0.0)
+        //        assertThat(embedding.embedding()).containsExactly(0.0)
+        assertThat(embedding.embedding().floatEmbedding).containsSame(mutableListOf(0.0))
         assertThat(embedding.index()).isEqualTo(0L)
     }
 }