diff --git a/bot/admin/server/src/main/kotlin/BotAdminService.kt b/bot/admin/server/src/main/kotlin/BotAdminService.kt index ec8386b168..b936cf3d7c 100644 --- a/bot/admin/server/src/main/kotlin/BotAdminService.kt +++ b/bot/admin/server/src/main/kotlin/BotAdminService.kt @@ -1153,7 +1153,9 @@ object BotAdminService { // delete the RAG configuration ragConfigurationDAO.findByNamespaceAndBotId(app.namespace, app.name)?.let { config -> ragConfigurationDAO.delete(config._id) - config.llmSetting.apiKey?.let { SecurityUtils.deleteSecret(it) } + config.questionCondensingLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) } + config.questionAnsweringLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) } + config.llmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) } config.emSetting.apiKey?.let { SecurityUtils.deleteSecret(it) } } diff --git a/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt b/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt index 9810aa0599..13f8672d72 100644 --- a/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt +++ b/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt @@ -18,6 +18,7 @@ package ai.tock.bot.admin.model import ai.tock.bot.admin.bot.rag.BotRAGConfiguration import ai.tock.bot.admin.service.VectorStoreService +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorcore.mappers.EMSettingMapper import ai.tock.genai.orchestratorcore.mappers.LLMSettingMapper import ai.tock.genai.orchestratorcore.models.Constants @@ -34,26 +35,39 @@ data class BotRAGConfigurationDTO( val namespace: String, val botId: String, val enabled: Boolean = false, - val llmSetting: LLMSettingDTO, + val questionCondensingLlmSetting: LLMSettingDTO? = null, + val questionCondensingPrompt: PromptTemplate? = null, + val questionAnsweringLlmSetting: LLMSettingDTO, + val questionAnsweringPrompt: PromptTemplate, val emSetting: EMSettingDTO, val indexSessionId: String? = null, val indexName: String? = null, val noAnswerSentence: String, val noAnswerStoryId: String? = null, val documentsRequired: Boolean = true, + val debugEnabled: Boolean, + val maxDocumentsRetrieved: Int, + val maxMessagesFromHistory: Int, ) { constructor(configuration: BotRAGConfiguration) : this( id = configuration._id.toString(), namespace = configuration.namespace, botId = configuration.botId, enabled = configuration.enabled, - llmSetting = configuration.llmSetting.toDTO(), + questionCondensingLlmSetting = configuration.questionCondensingLlmSetting?.toDTO(), + questionCondensingPrompt = configuration.questionCondensingPrompt, + questionAnsweringLlmSetting = configuration.getQuestionAnsweringLLMSetting().toDTO(), + questionAnsweringPrompt = configuration.questionAnsweringPrompt + ?: configuration.initQuestionAnsweringPrompt(), emSetting = configuration.emSetting.toDTO(), indexSessionId = configuration.indexSessionId, indexName = configuration.generateIndexName(), noAnswerSentence = configuration.noAnswerSentence, noAnswerStoryId = configuration.noAnswerStoryId, documentsRequired = configuration.documentsRequired, + debugEnabled = configuration.debugEnabled, + maxDocumentsRetrieved = configuration.maxDocumentsRetrieved, + maxMessagesFromHistory = configuration.maxMessagesFromHistory, ) fun toBotRAGConfiguration(): BotRAGConfiguration = @@ -62,12 +76,20 @@ data class BotRAGConfigurationDTO( namespace = namespace, botId = botId, enabled = enabled, - llmSetting = LLMSettingMapper.toEntity( + questionCondensingLlmSetting = LLMSettingMapper.toEntity( + namespace = namespace, + botId = botId, + feature = Constants.GEN_AI_RAG_QUESTION_CONDENSING, + dto = questionCondensingLlmSetting!! + ), + questionCondensingPrompt = questionCondensingPrompt, + questionAnsweringLlmSetting = LLMSettingMapper.toEntity( namespace = namespace, botId = botId, feature = Constants.GEN_AI_RAG_QUESTION_ANSWERING, - dto = llmSetting + dto = questionAnsweringLlmSetting ), + questionAnsweringPrompt = questionAnsweringPrompt, emSetting = EMSettingMapper.toEntity( namespace = namespace, botId = botId, @@ -78,6 +100,9 @@ data class BotRAGConfigurationDTO( noAnswerSentence = noAnswerSentence, noAnswerStoryId = noAnswerStoryId, documentsRequired = documentsRequired, + debugEnabled = debugEnabled, + maxDocumentsRetrieved = maxDocumentsRetrieved, + maxMessagesFromHistory = maxMessagesFromHistory, ) } @@ -87,6 +112,7 @@ private fun BotRAGConfiguration.generateIndexName(): String? { namespace, botId, it, + maxDocumentsRetrieved, VectorStoreService.getVectorStoreConfiguration(namespace, botId, enabled = true) ?.setting ).second diff --git a/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt b/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt index 9e6ea41b91..ad8b7c5c46 100644 --- a/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt +++ b/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt @@ -17,6 +17,7 @@ package ai.tock.bot.admin.model import ai.tock.bot.admin.bot.sentencegeneration.BotSentenceGenerationConfiguration +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorcore.mappers.LLMSettingMapper import ai.tock.genai.orchestratorcore.models.Constants import ai.tock.genai.orchestratorcore.models.llm.LLMSettingDTO @@ -32,6 +33,7 @@ data class BotSentenceGenerationConfigurationDTO( val enabled: Boolean = false, val nbSentences: Int, val llmSetting: LLMSettingDTO, + val prompt: PromptTemplate, ) { constructor(configuration: BotSentenceGenerationConfiguration) : this( id = configuration._id.toString(), @@ -40,6 +42,7 @@ data class BotSentenceGenerationConfigurationDTO( enabled = configuration.enabled, nbSentences = configuration.nbSentences, llmSetting = configuration.llmSetting.toDTO(), + prompt = configuration.prompt ?: configuration.initPrompt() ) fun toSentenceGenerationConfiguration(): BotSentenceGenerationConfiguration = @@ -54,7 +57,8 @@ data class BotSentenceGenerationConfigurationDTO( botId = botId, feature = Constants.GEN_AI_COMPLETION_SENTENCE_GENERATION, dto = llmSetting - ) + ), + prompt = prompt ) } diff --git a/bot/admin/server/src/main/kotlin/service/CompletionService.kt b/bot/admin/server/src/main/kotlin/service/CompletionService.kt index 93379172a5..846e0384a1 100644 --- a/bot/admin/server/src/main/kotlin/service/CompletionService.kt +++ b/bot/admin/server/src/main/kotlin/service/CompletionService.kt @@ -63,6 +63,9 @@ object CompletionService { // Get LLM Setting and override the temperature val llmSetting = sentenceGenerationConfig.llmSetting.copyWithTemperature(request.llmTemperature) + // Get prompt + val prompt = sentenceGenerationConfig.prompt ?: sentenceGenerationConfig.initPrompt() + // Create the inputs map val inputs = mapOf( "locale" to request.locale, @@ -75,18 +78,11 @@ object CompletionService { ) ) - // Create a Jinja2 prompt template - val prompt = PromptTemplate( - formatter = Formatter.JINJA2.id, - template = llmSetting.prompt, - inputs = inputs - ) - // call the completion service to generate sentences return completionService .generateSentences( SentenceGenerationQuery( - llmSetting, prompt, + llmSetting, prompt.copy(inputs = inputs), ObservabilityService.getObservabilityConfiguration(namespace, botId, enabled = true)?.setting ) ) diff --git a/bot/admin/server/src/main/kotlin/service/RAGService.kt b/bot/admin/server/src/main/kotlin/service/RAGService.kt index 16b5d6522e..cd4db7b7f4 100644 --- a/bot/admin/server/src/main/kotlin/service/RAGService.kt +++ b/bot/admin/server/src/main/kotlin/service/RAGService.kt @@ -64,8 +64,11 @@ object RAGService { logger.info { "Deleting the RAG Configuration [namespace: $namespace, botId: $botId]" } ragConfigurationDAO.delete(ragConfig._id) - logger.info { "Deleting the LLM secret ..." } - ragConfig.llmSetting.apiKey?.let { SecurityUtils.deleteSecret(it) } + logger.info { "Deleting the question condensing LLM secret ..." } + ragConfig.questionCondensingLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) } + logger.info { "Deleting the question answering LLM secret ..." } + ragConfig.questionAnsweringLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) } + ragConfig.llmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) } logger.info { "Deleting the Embedding secret ..." } ragConfig.emSetting.apiKey?.let { SecurityUtils.deleteSecret(it) } } diff --git a/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt b/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt index d2de5a0ab8..f992c344ee 100644 --- a/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt +++ b/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt @@ -37,15 +37,24 @@ object RAGValidationService { private val vectorStoreProviderService: VectorStoreProviderService get() = injector.provide() fun validate(ragConfig: BotRAGConfiguration): Set { + val observabilitySetting = ObservabilityService.getObservabilityConfiguration( + ragConfig.namespace, ragConfig.botId, enabled = true + )?.setting + return mutableSetOf().apply { - val llmErrors = llmProviderService.checkSetting( + val questionCondensingLlmErrors = llmProviderService.checkSetting( + LLMProviderSettingStatusQuery( + ragConfig.questionCondensingLlmSetting!!, + observabilitySetting + ) + ).getErrors("LLM setting check failed (for question condensing)") + + val questionAnsweringLlmErrors = llmProviderService.checkSetting( LLMProviderSettingStatusQuery( - ragConfig.llmSetting, - ObservabilityService.getObservabilityConfiguration( - ragConfig.namespace, ragConfig.botId, enabled = true - )?.setting + ragConfig.questionAnsweringLlmSetting!!, + observabilitySetting ) - ).getErrors("LLM setting check failed") + ).getErrors("LLM setting check failed (for question answering)") val embeddingErrors = emProviderService.checkSetting( EMProviderSettingStatusQuery(ragConfig.emSetting) @@ -59,7 +68,11 @@ object RAGValidationService { )?.setting val (_, indexName) = VectorStoreUtils.getVectorStoreElements( - ragConfig.namespace, ragConfig.botId, ragConfig.indexSessionId!!, vectorStoreSetting + ragConfig.namespace, + ragConfig.botId, + ragConfig.indexSessionId!!, + ragConfig.maxDocumentsRetrieved, + vectorStoreSetting ) vectorStoreProviderService.checkSetting( @@ -71,7 +84,7 @@ object RAGValidationService { ).getErrors("Vector store setting check failed") } ?: emptySet() - addAll(llmErrors + embeddingErrors + indexSessionIdErrors + vectorStoreErrors) + addAll(questionCondensingLlmErrors + questionAnsweringLlmErrors + embeddingErrors + indexSessionIdErrors + vectorStoreErrors) } } diff --git a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt index eeabc764c8..ce9301c5da 100644 --- a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt +++ b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt @@ -30,6 +30,7 @@ import ai.tock.bot.test.TFunction import ai.tock.bot.test.TRunnable import ai.tock.bot.test.TSupplier import ai.tock.bot.test.TestCase +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorclient.responses.ProviderSettingStatusResponse import ai.tock.genai.orchestratorclient.services.EMProviderService import ai.tock.genai.orchestratorclient.services.LLMProviderService @@ -65,17 +66,24 @@ class RAGServiceTest : AbstractTest() { const val INDEX_SESSION_ID = "1010101" private val DEFAULT_RAG_CONFIG = BotRAGConfigurationDTO( - id = "ragId", + id = "ragId", namespace = NAMESPACE, botId = BOT_ID, enabled = false, - llmSetting = OpenAILLMSettingDTO( + questionCondensingLlmSetting = OpenAILLMSettingDTO( apiKey = "apikey", model = MODEL, - prompt = PROMPT, temperature = TEMPERATURE, baseUrl = "https://api.openai.com/v1" ), + questionCondensingPrompt = PromptTemplate(template = PROMPT), + questionAnsweringLlmSetting = OpenAILLMSettingDTO( + apiKey = "apikey", + model = MODEL, + temperature = TEMPERATURE, + baseUrl = "https://api.openai.com/v1" + ), + questionAnsweringPrompt = PromptTemplate(template = PROMPT), emSetting = AzureOpenAIEMSettingDTO( apiKey = "apiKey", apiVersion = "apiVersion", @@ -83,7 +91,11 @@ class RAGServiceTest : AbstractTest() { model = "model", apiBase = "url" ), - noAnswerSentence = "No answer sentence" + noAnswerSentence = "No answer sentence", + documentsRequired = true, + debugEnabled = false, + maxDocumentsRetrieved = 2, + maxMessagesFromHistory = 2, ) private val DEFAULT_BOT_CONFIG = aApplication.copy(namespace = NAMESPACE, botId = BOT_ID) @@ -186,9 +198,9 @@ class RAGServiceTest : AbstractTest() { Assertions.assertEquals(BOT_ID, captured.botId) Assertions.assertEquals(true, captured.enabled) Assertions.assertEquals(NAMESPACE, captured.namespace) - Assertions.assertEquals(PROVIDER, captured.llmSetting.provider.name) - Assertions.assertEquals(TEMPERATURE, captured.llmSetting.temperature) - Assertions.assertEquals(PROMPT, captured.llmSetting.prompt) + Assertions.assertEquals(PROVIDER, captured.questionAnsweringLlmSetting!!.provider.name) + Assertions.assertEquals(TEMPERATURE, captured.questionAnsweringLlmSetting!!.temperature) + Assertions.assertEquals(PROMPT, captured.questionAnsweringPrompt!!.template) Assertions.assertEquals(null, captured.noAnswerStoryId) } diff --git a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt index 4f5a538be7..2aec481459 100644 --- a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt +++ b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt @@ -19,6 +19,7 @@ package ai.tock.bot.admin.service import ai.tock.bot.admin.bot.observability.BotObservabilityConfigurationDAO import ai.tock.bot.admin.bot.vectorstore.BotVectorStoreConfigurationDAO import ai.tock.bot.admin.model.BotRAGConfigurationDTO +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorclient.responses.ErrorInfo import ai.tock.genai.orchestratorclient.responses.ErrorResponse import ai.tock.genai.orchestratorclient.responses.ProviderSettingStatusResponse @@ -63,7 +64,7 @@ class RAGValidationServiceTest { } private val openAILLMSetting = OpenAILLMSetting( - apiKey = "123-abc", model = "unavailable-model", temperature = "0.4", prompt = "How to bike in the rain", + apiKey = "123-abc", model = "unavailable-model", temperature = "0.4", baseUrl = "https://api.openai.com/v1", ) @@ -78,9 +79,16 @@ class RAGValidationServiceTest { private val ragConfiguration = BotRAGConfigurationDTO( namespace = "namespace", botId = "botId", - llmSetting = openAILLMSetting, + questionCondensingLlmSetting = openAILLMSetting, + questionCondensingPrompt = PromptTemplate(template = "test"), + questionAnsweringLlmSetting = openAILLMSetting, + questionAnsweringPrompt = PromptTemplate(template = "How to bike in the rain"), emSetting = azureOpenAIEMSetting, noAnswerSentence = " No answer sentence", + documentsRequired = true, + debugEnabled = false, + maxDocumentsRetrieved = 2, + maxMessagesFromHistory = 2, ) @Test @@ -163,7 +171,7 @@ class RAGValidationServiceTest { fun `validation of the RAG configuration when the Orchestrator returns 2 errors for LLM and 1 for Embedding model, the RAG function has not been activated`() { // GIVEN - // - 3 errors returned by Generative AI Orchestrator for LLM (2) and EM (1) + // - 3 errors returned by Generative AI Orchestrator for LLM (4 = 2 for condensing + 2 for answering) and EM (1) // - RAG is not enabled every { llmProviderService.checkSetting(any()) @@ -187,11 +195,13 @@ class RAGValidationServiceTest { ) // THEN : - // Check that 3 errors have been found - assertEquals(2, errors.size) + // Check that 3 groups of errors have been found + assertEquals(3, errors.size) assertEquals("10", (((errors.elementAt(0).params) as List<*>)[0] as ErrorResponse).code) assertEquals("20", (((errors.elementAt(0).params) as List<*>)[1] as ErrorResponse).code) - assertEquals("30", (((errors.elementAt(1).params) as List<*>)[0] as ErrorResponse).code) + assertEquals("10", (((errors.elementAt(1).params) as List<*>)[0] as ErrorResponse).code) + assertEquals("20", (((errors.elementAt(1).params) as List<*>)[1] as ErrorResponse).code) + assertEquals("30", (((errors.elementAt(2).params) as List<*>)[0] as ErrorResponse).code) } private fun createFakeErrorResponse(code: String) = ErrorResponse( diff --git a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt index 279c94be8a..5c38398e8d 100644 --- a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt +++ b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt @@ -16,8 +16,11 @@ package ai.tock.bot.admin.bot.rag +import ai.tock.genai.orchestratorclient.requests.Formatter +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorcore.models.em.EMSetting import ai.tock.genai.orchestratorcore.models.llm.LLMSetting +import ai.tock.shared.intProperty import org.litote.kmongo.Id data class BotRAGConfiguration( @@ -25,10 +28,31 @@ data class BotRAGConfiguration( val namespace: String, val botId: String, val enabled: Boolean, - val llmSetting: LLMSetting, + val questionCondensingLlmSetting: LLMSetting? = null, + val questionCondensingPrompt: PromptTemplate? = null, + val questionAnsweringLlmSetting: LLMSetting? = null, + val questionAnsweringPrompt: PromptTemplate? = null, + @Deprecated("use BotRAGConfiguration#questionAnsweringLlmSetting") + val llmSetting: LLMSetting? = null, val emSetting: EMSetting, val indexSessionId: String? = null, val noAnswerSentence: String, val noAnswerStoryId: String? = null, val documentsRequired: Boolean = true, -) + val debugEnabled: Boolean = false, + val maxDocumentsRetrieved: Int = 4, + val maxMessagesFromHistory: Int = 5, +) { + @Deprecated("use BotRAGConfiguration#questionAnsweringLlmSetting") + fun initQuestionAnsweringPrompt(): PromptTemplate { + // Temporary stopgap until the next version of Tock, + // which will remove the prompt at LLMSetting level and use the promptTemplate + return PromptTemplate( + formatter = Formatter.F_STRING.id, + template = getQuestionAnsweringLLMSetting().prompt!! + ) + } + + @Deprecated("use BotRAGConfiguration#questionAnsweringLlmSetting") + fun getQuestionAnsweringLLMSetting(): LLMSetting = (questionAnsweringLlmSetting ?: llmSetting)!! +} diff --git a/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt index e0eb535a61..7509abc32f 100644 --- a/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt +++ b/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt @@ -17,6 +17,8 @@ package ai.tock.bot.admin.bot.sentencegeneration +import ai.tock.genai.orchestratorclient.requests.Formatter +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorcore.models.llm.LLMSetting import org.litote.kmongo.Id @@ -27,4 +29,15 @@ data class BotSentenceGenerationConfiguration( val enabled: Boolean = false, val nbSentences: Int, val llmSetting: LLMSetting, -) \ No newline at end of file + val prompt: PromptTemplate? = null, +) { + @Deprecated("use BotSentenceGenerationConfiguration#prompt") + fun initPrompt(): PromptTemplate { + // Temporary stopgap until the next version of Tock, + // which will remove the prompt at LLMSetting level and use the promptTemplate + return PromptTemplate( + formatter = Formatter.JINJA2.id, + template = llmSetting.prompt!! + ) + } +} \ No newline at end of file diff --git a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt index 1c73b4c785..bb869fc226 100644 --- a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt +++ b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt @@ -42,15 +42,9 @@ import ai.tock.shared.* import engine.config.AbstractProactiveAnswerHandler import mu.KotlinLogging -private val nLastMessages = intProperty( - name = "tock_gen_ai_orchestrator_dialog_number_messages", - defaultValue = 5) private val technicalErrorMessage = property( name = "tock_gen_ai_orchestrator_technical_error", defaultValue = "Technical error :( sorry!") -private val ragDebugEnabled = booleanProperty( - name = "tock_gen_ai_orchestrator_rag_debug_enabled", - defaultValue = false) object RAGAnswerHandler : AbstractProactiveAnswerHandler { @@ -68,7 +62,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { val (answer, debug, noAnswerStory, observabilityInfo) = rag(this) // Add debug data if available and if debugging is enabled - if (debug != null && (action.metadata.debugEnabled || ragDebugEnabled)) { + if (debug != null) { logger.info { "Send RAG debug data." } sendDebugData("RAG", debug) } @@ -171,24 +165,28 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { ragConfiguration.botId, // The indexSessionId is mandatory to enable RAG Story ragConfiguration.indexSessionId!!, + ragConfiguration.maxDocumentsRetrieved, vectorStoreSetting ) + val questionAnsweringPrompt = ragConfiguration.questionAnsweringPrompt + ?: ragConfiguration.initQuestionAnsweringPrompt() + try { val response = ragService.rag( query = RAGQuery( dialog = DialogDetails( dialogId = dialog.id.toString(), userId = dialog.playerIds.firstOrNull { PlayerType.user == it.type }?.id, - history = getDialogHistory(dialog), + history = getDialogHistory(dialog, ragConfiguration.maxMessagesFromHistory), tags = listOf( "connector:${underlyingConnector.connectorType.id}" ) ), - questionAnsweringLlmSetting = ragConfiguration.llmSetting, - questionAnsweringPrompt = PromptTemplate( - formatter = Formatter.F_STRING.id, - template = ragConfiguration.llmSetting.prompt, + questionCondensingLlmSetting = ragConfiguration.questionCondensingLlmSetting, + questionCondensingPrompt = ragConfiguration.questionCondensingPrompt, + questionAnsweringLlmSetting = ragConfiguration.getQuestionAnsweringLLMSetting(), + questionAnsweringPrompt = questionAnsweringPrompt.copy( inputs = mapOf( "question" to action.toString(), "locale" to userPreferences.locale.displayLanguage, @@ -202,7 +200,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { vectorStoreSetting = vectorStoreSetting, observabilitySetting = botDefinition.observabilityConfiguration?.setting, documentsRequired = ragConfiguration.documentsRequired, - ), debug = action.metadata.debugEnabled || ragDebugEnabled + ), debug = action.metadata.debugEnabled || ragConfiguration.debugEnabled ) // Handle RAG response @@ -232,7 +230,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler { * Create a dialog history (Human and Bot message) * @param dialog */ - private fun getDialogHistory(dialog: Dialog): List = dialog.stories.flatMap { it.actions }.mapNotNull { + private fun getDialogHistory(dialog: Dialog, nLastMessages: Int): List = dialog.stories.flatMap { it.actions }.mapNotNull { when (it) { is SendSentence -> if (it.text == null) null else ChatMessage( diff --git a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt index 392e5b6c88..2f42119a04 100644 --- a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt +++ b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt @@ -17,6 +17,7 @@ package ai.tock.bot.mongo import ai.tock.bot.admin.bot.rag.BotRAGConfiguration +import ai.tock.genai.orchestratorclient.requests.PromptTemplate import ai.tock.genai.orchestratorcore.models.em.OpenAIEMSetting import ai.tock.genai.orchestratorcore.models.llm.OpenAILLMSetting import ai.tock.shared.security.key.RawSecretKey @@ -44,13 +45,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { "namespace1", "botId1", false, - llmSetting = OpenAILLMSetting( + questionAnsweringLlmSetting = OpenAILLMSetting( apiKey = RawSecretKey("apiKey1"), model = "modelName1", temperature = "1F", - prompt = "prompt1", baseUrl = "https://api.openai.com/v1" ), + questionAnsweringPrompt = PromptTemplate(template = "prompt template"), emSetting = OpenAIEMSetting( apiKey = RawSecretKey("apiKey1"), model = "modelName1", @@ -72,13 +73,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { "namespace1", "botId1", false, - llmSetting = OpenAILLMSetting( + questionAnsweringLlmSetting = OpenAILLMSetting( apiKey = RawSecretKey("apiKey1"), model = "modelName1", baseUrl = "https://api.openai.com/v1", temperature = "1F", - prompt = "prompt1" ), + questionAnsweringPrompt = PromptTemplate(template = "prompt template"), emSetting = OpenAIEMSetting( apiKey = RawSecretKey("apiKey1"), baseUrl = "https://api.openai.com/v1", @@ -92,13 +93,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { "namespace1", "botId2", false, - llmSetting = OpenAILLMSetting( + questionAnsweringLlmSetting = OpenAILLMSetting( apiKey = RawSecretKey("apiKey1"), model = "modelName1", temperature = "1F", baseUrl = "https://api.openai.com/v1", - prompt = "prompt1" ), + questionAnsweringPrompt = PromptTemplate(template = "prompt template"), emSetting = OpenAIEMSetting( apiKey = RawSecretKey("apiKey1"), baseUrl = "https://api.openai.com/v1", @@ -126,13 +127,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() { "namespace1", "botId1", false, - llmSetting = OpenAILLMSetting( + questionAnsweringLlmSetting = OpenAILLMSetting( apiKey = RawSecretKey("apiKey1"), model = "modelName1", temperature = "1F", baseUrl = "https://api.openai.com/v1", - prompt = "prompt1" ), + questionAnsweringPrompt = PromptTemplate(template = "prompt template"), emSetting = OpenAIEMSetting( apiKey = RawSecretKey("apiKey1"), baseUrl = "https://api.openai.com/v1", diff --git a/docs/docs/en/dev/gen_ai_orchestrator/api.md b/docs/docs/en/dev/gen_ai_orchestrator/api.md new file mode 100644 index 0000000000..5f726a1a49 --- /dev/null +++ b/docs/docs/en/dev/gen_ai_orchestrator/api.md @@ -0,0 +1,442 @@ + +# Tock LLM Orchestrator APIs +| # | Resource | Description | HTTP Method | Query params | Body - Response | +|----|-------------------------------------------------|---------------------------------------------------------------|---------------|--------------|---------------------------------------------| +| 01 | **/llm-providers** | Get all Large Language Model providers covered | `GET` | | [Voir](#api-llm-providers) | +| 02 | **/llm-providers/{provider-id}** | Get a specific Large Language Model provider covered | `GET` | | [Voir](#api-llm-providers-get-one) | +| 03 | **/llm-providers/{provider-id}/setting** | Get an example for a specific Large Language Model setting | `GET` | | [Voir](#api-llm-providers-get-setting) | +| 04 | **/llm-providers/{provider-id}/setting/status** | Check the setting for a specific Large Language Model setting | `POST` | | [Voir](#api-llm-providers-post-setting) | +| 05 | **/em-providers** | Get all Embedding Model providers covered | `GET` | | [Voir](#api-em-providers) | +| 06 | **/em-providers/{provider-id}** | Get a specific Embedding Model provider covered | `GET` | | [Voir](#api-em-providers-get-one) | +| 07 | **/em-providers/{provider-id}/setting** | Get an example for a specific Embedding Model setting | `GET` | | [Voir](#api-em-providers-get-setting) | +| 08 | **/em-providers/{provider-id}/setting/status** | Check the setting for a specific Embedding Model setting | `POST` | | [Voir](#api-em-providers-post-setting) | +| 09 | **/rag** | Ask question by using a knowledge base (documents) as context | `POST` | debug=true | [Voir](#api-rag) | +| 10 | **/completion/sentence-generation** | Generate sentences | `POST` | debug=true | [Voir](#api-completion-sentence-generation) | +--- +

(back to top)

+ +#### API-01 : `[GET]` /llm-providers + +#### Response + +```python +class LLMProvidersResponse(BaseModel) + providers: list[LLMProvider] +``` + +--- +

(back to top)

+ +#### API-02 : `[GET]` /llm-providers/{provider-id} + +#### Response + +```python +class LLMProviderResponse(BaseModel): + provider: LLMProvider +``` + +--- +

(back to top)

+ +#### API-03 : `[GET]` /llm-providers/{provider-id}/setting/example + +#### Response + +```python +class LLMProviderSettingExampleResponse(BaseModel): + setting: LLMSetting +``` + +--- +

(back to top)

+ +#### API-04 : `[POST]` /llm-providers/{provider-id}/setting/status + +#### Body + +```python +class LLMProviderSettingStatusQuery(BaseModel): + setting: LLMSetting +``` +#### Response + +```python +class LLMProviderSettingStatusResponse(BaseModel): + valid: bool, + errors: list[Error] +``` + +--- +

(back to top)

+ +#### API-05 : `[GET]` /em-providers + +#### Response + +```python +class EMProvidersResponse(BaseModel) + providers: list[LLMProvider] +``` + +--- +

(back to top)

+ +#### API-06 : `[GET]` /em-providers/{provider-id} + +#### Response + +```python +class EMProviderResponse(BaseModel): + provider: LLMProvider +``` + +--- +

(back to top)

+ +#### API-07 : `[GET]` /em-providers/{provider-id}/setting/example + +#### Response + +```python +class EMProviderSettingExampleResponse(BaseModel): + setting: EMSetting +``` + +--- +

(back to top)

+ +#### API-08 : `[POST]` /em-providers/{provider-id}/setting/status + +#### Body + +```python +class EMProviderSettingStatusQuery(BaseModel): + setting: EMSetting +``` +#### Response + +```python +class EMProviderSettingStatusResponse(BaseModel): + valid: bool, + errors: list[Error] +``` + +--- +

(back to top)

+ +#### API-09 : `[POST]` /rag + +#### Body : + +```python +class RagQuery(BaseModel): + dialog: Optional[DialogDetails] + question_condensing_llm_setting: LLMSetting + question_condensing_prompt: Optional[PromptTemplate] + question_answering_llm_setting: LLMSetting + question_answering_prompt: Optional[PromptTemplate] + embedding_question_em_setting: EMSetting + guardrail_setting: Optional[GuardrailSetting] + observability_setting: Optional[ObservabilitySetting] + compressor_setting: Optional[DocumentCompressorSetting] + document_index_name: str + document_search_params: DocumentSearchParams +``` + +#### Response : + +```python +class RagResponse(BaseModel): + answer: TextWithFootnotes + debug: Optional[Any] = None +``` + +--- +

(back to top)

+ +#### API-10 : `[POST]` /completion/sentence-generation + +#### Body : + +```python +class SentenceGenerationQuery(BaseModel): + llm_setting: LLMSetting +``` + +#### Response : + +```python +class SentenceGenerationResponse(BaseModel): + sentences: list[str] +``` + +--- +

(back to top)

+ +## Schemas +```python +class LLMProvider(str, Enum): + OPEN_AI = 'OpenAI' + AZURE_OPEN_AI_SERVICE = 'AzureOpenAIService' + +class BaseLLMSetting(BaseModel): + provider: LLMProvider + api_key: str + temperature: str + +class BaseEMSetting(BaseModel): + provider: LLMProvider + api_key: str + +class OpenAILLMSetting(BaseLLMSetting): + provider: Literal[LLMProvider.OPEN_AI] + model: str + +class AzureOpenAILLMSetting(BaseLLMSetting): + provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE] + deployment_name: str + model: Optional[str] + api_base: str + api_version: str + +LLMSetting = Annotated[ + Union[OpenAILLMSetting, AzureOpenAILLMSetting], + Body(discriminator='provider') +] + +class OpenAIEMSetting(BaseEMSetting): + provider: Literal[LLMProvider.OPEN_AI] + model: str + +class AzureOpenAIEMSetting(BaseEMSetting): + provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE] + deployment_name: str + model: Optional[str] + api_base: str + api_version: str + +EMSetting = Annotated[ + Union[OpenAIEMSetting, AzureOpenAIEMSetting], + Body(discriminator='provider') +] + +class VectorStoreProvider(str, Enum): + OPEN_SEARCH = 'OpenSearch' + +class BaseVectorStoreSearchParams(ABC, BaseModel): + provider: VectorStoreProvider + + +class OpenSearchParams(BaseVectorStoreSearchParams): + provider: Literal[VectorStoreProvider.OPEN_SEARCH] + k: int + filter: List[OpenSearchTermParams] + +class OpenSearchTermParams(BaseModel): + term: dict + +DocumentSearchParams = Annotated[ + Union[OpenSearchParams], Body(discriminator='provider') +] + +class Footnote(BaseModel): + identifier: str + title: str + url: Optional[str] = None + +class TextWithFootnotes(BaseModel): + text: str + footnotes: list[Footnote] + +class DialogDetails(BaseModel): + dialog_id: Optional[str] + user_id: Optional[str] + history: list[ChatMessage] + tags: list[str] + +class ChatMessageType(str, Enum): + USER = 'HUMAN' + AI = 'AI' + +class ChatMessage(BaseModel): + text: str + type: ChatMessageType + +class PromptTemplate(BaseModel): + formatter: PromptFormatter + template: str + inputs: dict + +class PromptFormatter(str, Enum): + F_STRING = 'f-string' + JINJA2 = 'jinja2' + +class Error(BaseModel): + code: str + message: str +``` + +--- +

(back to top)

+ +## Errors +#### General : +```json + [ + { + "code": 1000, + "message": "Unknown AI provider.", + "detail": null, + "info": { + "provider": "TOTO", + "error": "NotFoundError", + "cause": "'TOTO' is not accepted. Accepted values are : ['OpenAI', 'AzureOpenAIService']", + "request": "[POST] http://localhost:8000/llm-providers/TOTO/setting/status" + } + }, + { + "code": 1001, + "message": "Bad query.", + "detail": "The request seems to be invalid.", + "info": { + "provider": "AzureOpenAIService", + "error": "BadRequestError", + "cause": "'AzureOpenAIService' is different from the provider ID 'OpenAI' given for setting.", + "request": "[POST] http://localhost:8000/llm-providers/AzureOpenAIService/setting/status" + } + } +] +``` +#### LLMProvider OpenAI : +```json + [ + { + "code": 2001, + "message": "Connection error to the AI provider API.", + "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.", + "info": { + "provider": "OpenAI", + "error": "APIConnectionError", + "cause": "Connection error.", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2002, + "message": "Authentication error to the AI provider API.", + "detail": "Check your API key or token and make sure it is correct and active.", + "info": { + "provider": "OpenAI", + "error": "AuthenticationError", + "cause": "Error code: 401 - {'error': {'message': 'Incorrect API key provided: ab7*****************************IV4B. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2003, + "message": "An AI provider resource was not found.", + "detail": "The request URL base is correct, but the path or a query parameter is not.", + "info": { + "provider": "OpenAI", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'message': 'This is not a chat model and thus not supported in the v1/chat/completions endpoint. Did you mean to use v1/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2004, + "message": "Unknown AI provider model.", + "detail": "Consult the official documentation for accepted values.", + "info": { + "provider": "OpenAI", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'message': 'The model `gpt-3.5-TOTO` does not exist', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2007, + "message": "The model's context length has been exceeded.", + "detail": "Reduce the length of the prompt message.", + "info": { + "provider": "OpenAI", + "error": "BadRequestError", + "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 4097 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + } +] +``` + +#### LLMProvider AzureOpenAIService : +```json + [ + { + "code": 2001, + "message": "Connection error to the AI provider API.", + "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.", + "info": { + "provider": "AzureOpenAIService", + "error": "APIConnectionError", + "cause": "Connection error.", + "request": "[POST] https://conversationnel-api-arkea.azure-oapi.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview" + } + }, + { + "code": 2002, + "message": "Authentication error to the AI provider API.", + "detail": "Check your API key or token and make sure it is correct and active.", + "info": { + "provider": "AzureOpenAIService", + "error": "AuthenticationError", + "cause": "Error code: 401 - {'statusCode': 401, 'message': 'Access denied due to invalid subscription key. Make sure to provide a valid key for an active subscription.'}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview" + } + }, + { + "code": 2003, + "message": "An AI provider resource was not found.", + "detail": "The request URL base is correct, but the path or a query parameter is not.", + "info": { + "provider": "AzureOpenAIService", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-toto" + } + }, + { + "code": 2005, + "message": "Unknown AI provider deployment.", + "detail": "Consult the official documentation for accepted values.", + "info": { + "provider": "AzureOpenAIService", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4f/chat/completions?api-version=2023-03-15-preview" + } + }, + { + "code": 2006, + "message": "AI provider API error.", + "detail": "Bad request.", + "info": { + "provider": "AzureOpenAIService", + "error": "BadRequestError", + "cause": "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The embeddings operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net//openai/deployments/squadconv-gpt4/embeddings?api-version=2023-03-15-preview" + } + }, + { + "code": 2007, + "message": "The model's context length has been exceeded.", + "detail": "Reduce the length of the prompt message.", + "info": { + "provider": "AzureOpenAIService", + "error": "BadRequestError", + "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 8192 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview" + } + } +] +``` diff --git a/docs/docs/en/user/studio/gen-ai/gen-ai.md b/docs/docs/en/user/studio/gen-ai/gen-ai.md index 09f8a44c46..f8a9959061 100644 --- a/docs/docs/en/user/studio/gen-ai/gen-ai.md +++ b/docs/docs/en/user/studio/gen-ai/gen-ai.md @@ -4,14 +4,14 @@ title: Gen AI # Gen AI -Découvrez l'IA générative sur Tock : +Discover generative AI on Tock: -- [Le menu _Gen AI / RAG Settings_](../../../user/studio/gen-ai/features/gen-ai-feature-rag.md) -- [Le menu _Gen AI / RAG Exclusions_](../gen-ai/features/gen-ai-feature-rag-exclusion.md) -- [Le menu _Gen AI / Sentence Generation_](../../../user/studio/gen-ai/features/gen-ai-feature-sentence-generation.md) -- [Le menu _Gen AI / Observability Settings_](../../../user/studio/gen-ai/features/gen-ai-feature-observability.md) -- [Le menu _Gen AI / Vector Store Settings_](../../../user/studio/gen-ai/features/gen-ai-feature-vector-store.md) +- [The _Gen AI / RAG Settings_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-rag.md) +- [The _Gen AI / RAG Exclusions_ menu](../gen-ai/features/gen-ai-feature-rag-exclusion.md) +- [The _Gen AI / Sentence Generation_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-sentence-generation.md) +- [The _Gen AI / Observability Settings_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-observability.md) +- [The _Gen AI / Vector Store Settings_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-vector-store.md) -Démo : +Demo: -[![RAG et LLM Open Source ? Vers des IA Génératives plus responsables](https://img.youtube.com/vi/lYmUeYoVkwc/0.jpg)](https://youtu.be/lYmUeYoVkwc) +[![Open Source LLM and RAG - Towards more responsible Generative AI](https://img.youtube.com/vi/lYmUeYoVkwc/0.jpg)](https://youtu.be/lYmUeYoVkwc) diff --git a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md index 0163f75eed..88cd4d346c 100644 --- a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md +++ b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md @@ -2,16 +2,15 @@ title: Fournisseurs des modèles LLM et d'Embedding --- -# Fournisseurs des modèles LLM et d'Embedding - -Voici la liste des fournisseurs des modèles LLM et d'Embedding pris en compte par Tock : +# LLM and Embedding model providers +Here is the list of LLM and Embedding model suppliers supported by Tock: - - - + + + @@ -29,7 +28,6 @@ OpenAI
"value": "aebb4b****************7b25e3371" }, "temperature": "1.0", - "prompt": "Customized prompt for the use case", "model": "gpt-3.5-turbo" } @@ -61,7 +59,6 @@ AzureOpenAIService
"value": "aebb4b****************7b25e3371" }, "temperature": "1.0", - "prompt": "Customized prompt for the use case", "api_base": "https://custom-api-name.azure-api.net", "deployment_name": "custom-deployment-name", "model": "gpt-4o", diff --git a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md index 059fc1d0ee..8462a0b298 100644 --- a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md +++ b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md @@ -2,14 +2,14 @@ title: Fournisseurs d'observabilité des LLMs --- -# Fournisseurs d'observabilité des LLMs. +# LLM observability providers. -Voici la liste des fournisseurs d'observabilité des LLM pris en compte par Tock : +Here is the list of LLM observability providers supported by Tock:
Fournisseur de l'IAConfiguration du LLMConfiguration de l'EmbeddingAI providerLLM configurationEmbedding configuration
- + diff --git a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md index dbefd650e8..3225650bad 100644 --- a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md +++ b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md @@ -2,14 +2,14 @@ title: Fournisseurs de base vectorielle --- -# Fournisseurs de base vectorielle. +# Vector base providers. -Voici la liste des fournisseurs de base vectorielle pris en compte par Tock : +Here is the list of vector base providers supported by Tock:
Fournisseur d'observabilitéLLM observability providers Configuration
- + diff --git a/docs/docs/fr/dev/gen_ai_orchestrator/api.md b/docs/docs/fr/dev/gen_ai_orchestrator/api.md new file mode 100644 index 0000000000..5f726a1a49 --- /dev/null +++ b/docs/docs/fr/dev/gen_ai_orchestrator/api.md @@ -0,0 +1,442 @@ + +# Tock LLM Orchestrator APIs +| # | Resource | Description | HTTP Method | Query params | Body - Response | +|----|-------------------------------------------------|---------------------------------------------------------------|---------------|--------------|---------------------------------------------| +| 01 | **/llm-providers** | Get all Large Language Model providers covered | `GET` | | [Voir](#api-llm-providers) | +| 02 | **/llm-providers/{provider-id}** | Get a specific Large Language Model provider covered | `GET` | | [Voir](#api-llm-providers-get-one) | +| 03 | **/llm-providers/{provider-id}/setting** | Get an example for a specific Large Language Model setting | `GET` | | [Voir](#api-llm-providers-get-setting) | +| 04 | **/llm-providers/{provider-id}/setting/status** | Check the setting for a specific Large Language Model setting | `POST` | | [Voir](#api-llm-providers-post-setting) | +| 05 | **/em-providers** | Get all Embedding Model providers covered | `GET` | | [Voir](#api-em-providers) | +| 06 | **/em-providers/{provider-id}** | Get a specific Embedding Model provider covered | `GET` | | [Voir](#api-em-providers-get-one) | +| 07 | **/em-providers/{provider-id}/setting** | Get an example for a specific Embedding Model setting | `GET` | | [Voir](#api-em-providers-get-setting) | +| 08 | **/em-providers/{provider-id}/setting/status** | Check the setting for a specific Embedding Model setting | `POST` | | [Voir](#api-em-providers-post-setting) | +| 09 | **/rag** | Ask question by using a knowledge base (documents) as context | `POST` | debug=true | [Voir](#api-rag) | +| 10 | **/completion/sentence-generation** | Generate sentences | `POST` | debug=true | [Voir](#api-completion-sentence-generation) | +--- +

(back to top)

+ +#### API-01 : `[GET]` /llm-providers + +#### Response + +```python +class LLMProvidersResponse(BaseModel) + providers: list[LLMProvider] +``` + +--- +

(back to top)

+ +#### API-02 : `[GET]` /llm-providers/{provider-id} + +#### Response + +```python +class LLMProviderResponse(BaseModel): + provider: LLMProvider +``` + +--- +

(back to top)

+ +#### API-03 : `[GET]` /llm-providers/{provider-id}/setting/example + +#### Response + +```python +class LLMProviderSettingExampleResponse(BaseModel): + setting: LLMSetting +``` + +--- +

(back to top)

+ +#### API-04 : `[POST]` /llm-providers/{provider-id}/setting/status + +#### Body + +```python +class LLMProviderSettingStatusQuery(BaseModel): + setting: LLMSetting +``` +#### Response + +```python +class LLMProviderSettingStatusResponse(BaseModel): + valid: bool, + errors: list[Error] +``` + +--- +

(back to top)

+ +#### API-05 : `[GET]` /em-providers + +#### Response + +```python +class EMProvidersResponse(BaseModel) + providers: list[LLMProvider] +``` + +--- +

(back to top)

+ +#### API-06 : `[GET]` /em-providers/{provider-id} + +#### Response + +```python +class EMProviderResponse(BaseModel): + provider: LLMProvider +``` + +--- +

(back to top)

+ +#### API-07 : `[GET]` /em-providers/{provider-id}/setting/example + +#### Response + +```python +class EMProviderSettingExampleResponse(BaseModel): + setting: EMSetting +``` + +--- +

(back to top)

+ +#### API-08 : `[POST]` /em-providers/{provider-id}/setting/status + +#### Body + +```python +class EMProviderSettingStatusQuery(BaseModel): + setting: EMSetting +``` +#### Response + +```python +class EMProviderSettingStatusResponse(BaseModel): + valid: bool, + errors: list[Error] +``` + +--- +

(back to top)

+ +#### API-09 : `[POST]` /rag + +#### Body : + +```python +class RagQuery(BaseModel): + dialog: Optional[DialogDetails] + question_condensing_llm_setting: LLMSetting + question_condensing_prompt: Optional[PromptTemplate] + question_answering_llm_setting: LLMSetting + question_answering_prompt: Optional[PromptTemplate] + embedding_question_em_setting: EMSetting + guardrail_setting: Optional[GuardrailSetting] + observability_setting: Optional[ObservabilitySetting] + compressor_setting: Optional[DocumentCompressorSetting] + document_index_name: str + document_search_params: DocumentSearchParams +``` + +#### Response : + +```python +class RagResponse(BaseModel): + answer: TextWithFootnotes + debug: Optional[Any] = None +``` + +--- +

(back to top)

+ +#### API-10 : `[POST]` /completion/sentence-generation + +#### Body : + +```python +class SentenceGenerationQuery(BaseModel): + llm_setting: LLMSetting +``` + +#### Response : + +```python +class SentenceGenerationResponse(BaseModel): + sentences: list[str] +``` + +--- +

(back to top)

+ +## Schemas +```python +class LLMProvider(str, Enum): + OPEN_AI = 'OpenAI' + AZURE_OPEN_AI_SERVICE = 'AzureOpenAIService' + +class BaseLLMSetting(BaseModel): + provider: LLMProvider + api_key: str + temperature: str + +class BaseEMSetting(BaseModel): + provider: LLMProvider + api_key: str + +class OpenAILLMSetting(BaseLLMSetting): + provider: Literal[LLMProvider.OPEN_AI] + model: str + +class AzureOpenAILLMSetting(BaseLLMSetting): + provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE] + deployment_name: str + model: Optional[str] + api_base: str + api_version: str + +LLMSetting = Annotated[ + Union[OpenAILLMSetting, AzureOpenAILLMSetting], + Body(discriminator='provider') +] + +class OpenAIEMSetting(BaseEMSetting): + provider: Literal[LLMProvider.OPEN_AI] + model: str + +class AzureOpenAIEMSetting(BaseEMSetting): + provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE] + deployment_name: str + model: Optional[str] + api_base: str + api_version: str + +EMSetting = Annotated[ + Union[OpenAIEMSetting, AzureOpenAIEMSetting], + Body(discriminator='provider') +] + +class VectorStoreProvider(str, Enum): + OPEN_SEARCH = 'OpenSearch' + +class BaseVectorStoreSearchParams(ABC, BaseModel): + provider: VectorStoreProvider + + +class OpenSearchParams(BaseVectorStoreSearchParams): + provider: Literal[VectorStoreProvider.OPEN_SEARCH] + k: int + filter: List[OpenSearchTermParams] + +class OpenSearchTermParams(BaseModel): + term: dict + +DocumentSearchParams = Annotated[ + Union[OpenSearchParams], Body(discriminator='provider') +] + +class Footnote(BaseModel): + identifier: str + title: str + url: Optional[str] = None + +class TextWithFootnotes(BaseModel): + text: str + footnotes: list[Footnote] + +class DialogDetails(BaseModel): + dialog_id: Optional[str] + user_id: Optional[str] + history: list[ChatMessage] + tags: list[str] + +class ChatMessageType(str, Enum): + USER = 'HUMAN' + AI = 'AI' + +class ChatMessage(BaseModel): + text: str + type: ChatMessageType + +class PromptTemplate(BaseModel): + formatter: PromptFormatter + template: str + inputs: dict + +class PromptFormatter(str, Enum): + F_STRING = 'f-string' + JINJA2 = 'jinja2' + +class Error(BaseModel): + code: str + message: str +``` + +--- +

(back to top)

+ +## Errors +#### General : +```json + [ + { + "code": 1000, + "message": "Unknown AI provider.", + "detail": null, + "info": { + "provider": "TOTO", + "error": "NotFoundError", + "cause": "'TOTO' is not accepted. Accepted values are : ['OpenAI', 'AzureOpenAIService']", + "request": "[POST] http://localhost:8000/llm-providers/TOTO/setting/status" + } + }, + { + "code": 1001, + "message": "Bad query.", + "detail": "The request seems to be invalid.", + "info": { + "provider": "AzureOpenAIService", + "error": "BadRequestError", + "cause": "'AzureOpenAIService' is different from the provider ID 'OpenAI' given for setting.", + "request": "[POST] http://localhost:8000/llm-providers/AzureOpenAIService/setting/status" + } + } +] +``` +#### LLMProvider OpenAI : +```json + [ + { + "code": 2001, + "message": "Connection error to the AI provider API.", + "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.", + "info": { + "provider": "OpenAI", + "error": "APIConnectionError", + "cause": "Connection error.", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2002, + "message": "Authentication error to the AI provider API.", + "detail": "Check your API key or token and make sure it is correct and active.", + "info": { + "provider": "OpenAI", + "error": "AuthenticationError", + "cause": "Error code: 401 - {'error': {'message': 'Incorrect API key provided: ab7*****************************IV4B. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2003, + "message": "An AI provider resource was not found.", + "detail": "The request URL base is correct, but the path or a query parameter is not.", + "info": { + "provider": "OpenAI", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'message': 'This is not a chat model and thus not supported in the v1/chat/completions endpoint. Did you mean to use v1/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2004, + "message": "Unknown AI provider model.", + "detail": "Consult the official documentation for accepted values.", + "info": { + "provider": "OpenAI", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'message': 'The model `gpt-3.5-TOTO` does not exist', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + }, + { + "code": 2007, + "message": "The model's context length has been exceeded.", + "detail": "Reduce the length of the prompt message.", + "info": { + "provider": "OpenAI", + "error": "BadRequestError", + "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 4097 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", + "request": "[POST] https://api.openai.com/v1/chat/completions" + } + } +] +``` + +#### LLMProvider AzureOpenAIService : +```json + [ + { + "code": 2001, + "message": "Connection error to the AI provider API.", + "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.", + "info": { + "provider": "AzureOpenAIService", + "error": "APIConnectionError", + "cause": "Connection error.", + "request": "[POST] https://conversationnel-api-arkea.azure-oapi.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview" + } + }, + { + "code": 2002, + "message": "Authentication error to the AI provider API.", + "detail": "Check your API key or token and make sure it is correct and active.", + "info": { + "provider": "AzureOpenAIService", + "error": "AuthenticationError", + "cause": "Error code: 401 - {'statusCode': 401, 'message': 'Access denied due to invalid subscription key. Make sure to provide a valid key for an active subscription.'}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview" + } + }, + { + "code": 2003, + "message": "An AI provider resource was not found.", + "detail": "The request URL base is correct, but the path or a query parameter is not.", + "info": { + "provider": "AzureOpenAIService", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-toto" + } + }, + { + "code": 2005, + "message": "Unknown AI provider deployment.", + "detail": "Consult the official documentation for accepted values.", + "info": { + "provider": "AzureOpenAIService", + "error": "NotFoundError", + "cause": "Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4f/chat/completions?api-version=2023-03-15-preview" + } + }, + { + "code": 2006, + "message": "AI provider API error.", + "detail": "Bad request.", + "info": { + "provider": "AzureOpenAIService", + "error": "BadRequestError", + "cause": "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The embeddings operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net//openai/deployments/squadconv-gpt4/embeddings?api-version=2023-03-15-preview" + } + }, + { + "code": 2007, + "message": "The model's context length has been exceeded.", + "detail": "Reduce the length of the prompt message.", + "info": { + "provider": "AzureOpenAIService", + "error": "BadRequestError", + "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 8192 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", + "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview" + } + } +] +``` diff --git a/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md b/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md index 0163f75eed..1a2e9301b9 100644 --- a/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md +++ b/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md @@ -29,7 +29,6 @@ OpenAI
"value": "aebb4b****************7b25e3371" }, "temperature": "1.0", - "prompt": "Customized prompt for the use case", "model": "gpt-3.5-turbo" } @@ -61,7 +60,6 @@ AzureOpenAIService
"value": "aebb4b****************7b25e3371" }, "temperature": "1.0", - "prompt": "Customized prompt for the use case", "api_base": "https://custom-api-name.azure-api.net", "deployment_name": "custom-deployment-name", "model": "gpt-4o", diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt index 42560a41a0..eb85c4cb4f 100644 --- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt +++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt @@ -21,8 +21,8 @@ enum class Formatter(val id: String) { JINJA2(id = "jinja2"); } -class PromptTemplate ( - val formatter: String, +data class PromptTemplate ( + val formatter: String = Formatter.JINJA2.id, val template: String, - val inputs: Map + val inputs: Map = emptyMap() ) \ No newline at end of file diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt index dfb0bafddb..2e6e4564ba 100644 --- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt +++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt @@ -23,9 +23,9 @@ import ai.tock.genai.orchestratorcore.models.vectorstore.DocumentSearchParamsBas import ai.tock.genai.orchestratorcore.models.vectorstore.VectorStoreSetting data class RAGQuery( - // val condenseQuestionLlmSetting: LLMSetting, - // val condenseQuestionPrompt: PromptTemplate, val dialog: DialogDetails?, + val questionCondensingLlmSetting: LLMSetting?, + val questionCondensingPrompt: PromptTemplate?, val questionAnsweringLlmSetting: LLMSetting, val questionAnsweringPrompt: PromptTemplate, val embeddingQuestionEmSetting: EMSetting, diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt index dfd2e0baef..ce6ef81f52 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt @@ -37,7 +37,6 @@ object LLMSettingMapper { OpenAILLMSetting( apiKey = SecurityUtils.fetchSecretKeyValue(apiKey), temperature = temperature, - prompt = prompt, model = model, baseUrl = baseUrl ) @@ -45,7 +44,6 @@ object LLMSettingMapper { AzureOpenAILLMSetting( apiKey = SecurityUtils.fetchSecretKeyValue(apiKey), temperature = temperature, - prompt = prompt, apiBase = apiBase, deploymentName = deploymentName, model = model, @@ -54,7 +52,6 @@ object LLMSettingMapper { is OllamaLLMSetting -> OllamaLLMSetting( temperature = temperature, - prompt = prompt, model = model, baseUrl = baseUrl ) @@ -78,7 +75,6 @@ object LLMSettingMapper { OpenAILLMSetting( apiKey = SecurityUtils.createSecretKey(namespace, botId, feature, apiKey), temperature = temperature, - prompt = prompt, model = model, baseUrl = baseUrl ) @@ -86,14 +82,17 @@ object LLMSettingMapper { AzureOpenAILLMSetting( SecurityUtils.createSecretKey(namespace, botId, feature, apiKey), temperature = temperature, - prompt = prompt, apiBase = apiBase, deploymentName = deploymentName, apiVersion = apiVersion, model = model ) is OllamaLLMSetting -> - OllamaLLMSetting(temperature, prompt, model, baseUrl) + OllamaLLMSetting( + temperature = temperature, + model = model, + baseUrl = baseUrl + ) else -> throw IllegalArgumentException("Unsupported LLM Setting") } diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt index b3b97e25f8..8735486df1 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt @@ -37,11 +37,11 @@ object VectorStoreSettingMapper { when(this){ is OpenSearchVectorStoreSetting -> { val fetchedPassword = SecurityUtils.fetchSecretKeyValue(password) - return OpenSearchVectorStoreSetting(host, port, username, fetchedPassword, k) + return OpenSearchVectorStoreSetting(host, port, username, fetchedPassword) } is PGVectorStoreSetting -> { val fetchedPassword = SecurityUtils.fetchSecretKeyValue(password) - return PGVectorStoreSetting(host, port, username, fetchedPassword, k, database) + return PGVectorStoreSetting(host, port, username, fetchedPassword, database) } else -> throw IllegalArgumentException("Unsupported VectorStore Setting") @@ -61,11 +61,11 @@ object VectorStoreSettingMapper { when(this){ is OpenSearchVectorStoreSetting -> { val secretPassword = SecurityUtils.createSecretKey(namespace, botId, feature, password) - return OpenSearchVectorStoreSetting(host, port, username, secretPassword, k) + return OpenSearchVectorStoreSetting(host, port, username, secretPassword) } is PGVectorStoreSetting -> { val secretPassword = SecurityUtils.createSecretKey(namespace, botId, feature, password) - return PGVectorStoreSetting(host, port, username, secretPassword, k, database) + return PGVectorStoreSetting(host, port, username, secretPassword, database) } else -> throw IllegalArgumentException("Unsupported VectorStore Setting") diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt index 45b2e69c87..333015c2d0 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt @@ -32,6 +32,7 @@ object Constants { private const val GEN_AI_RAG="$GEN_AI/RAG" private const val GEN_AI_COMPLETION="$GEN_AI/COMPLETION" + const val GEN_AI_RAG_QUESTION_CONDENSING="$GEN_AI_RAG/questionCondensing" const val GEN_AI_RAG_QUESTION_ANSWERING="$GEN_AI_RAG/questionAnswering" const val GEN_AI_RAG_EMBEDDING_QUESTION="$GEN_AI_RAG/embeddingQuestion" diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt index aa1d33a32f..cb05ba484b 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt @@ -19,7 +19,8 @@ package ai.tock.genai.orchestratorcore.models.llm data class AzureOpenAILLMSetting( override val apiKey: T, override val temperature: String, - override val prompt: String, + @Deprecated("use PromptTemplate#prompt") + override val prompt: String? = null, val apiBase: String, val deploymentName: String, val apiVersion: String, diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt index d2ca82f105..ec13ba14fb 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt @@ -38,7 +38,8 @@ abstract class LLMSettingBase( val provider: LLMProvider, open val apiKey: T? = null, open val temperature: String, - open val prompt: String + @Deprecated("use PromptTemplate#prompt") + open val prompt: String? = null ) { abstract fun copyWithTemperature(temperature: String): LLMSettingBase } diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt index caa577996b..7c03229a03 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt @@ -18,7 +18,8 @@ package ai.tock.genai.orchestratorcore.models.llm data class OllamaLLMSetting( override val temperature: String, - override val prompt: String, + @Deprecated("use PromptTemplate#prompt") + override val prompt: String? = null, val model: String, val baseUrl: String, ) : LLMSettingBase(provider = LLMProvider.Ollama, temperature = temperature, prompt = prompt) { @@ -26,3 +27,5 @@ data class OllamaLLMSetting( return this.copy(temperature=temperature) } } + +// TODO MASS : Check Compile + TU (car dernier commit) \ No newline at end of file diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt index da8445ff25..4e1805c1df 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt @@ -19,7 +19,8 @@ package ai.tock.genai.orchestratorcore.models.llm data class OpenAILLMSetting( override val apiKey: T, override val temperature: String, - override val prompt: String, + @Deprecated("use PromptTemplate#prompt") + override val prompt: String? = null, val model: String, val baseUrl: String, ) : LLMSettingBase(LLMProvider.OpenAI, apiKey, temperature, prompt) { diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt index 4117229cd2..741ea6fb75 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt @@ -23,25 +23,23 @@ data class OpenSearchVectorStoreSetting( override val port: Int, override val username: String, override val password: T, - override val k: Int, ) : VectorStoreSettingBase( provider = VectorStoreProvider.OpenSearch, host = host, port = port, username = username, - password = password, - k = k + password = password ) { override fun normalizeDocumentIndexName(namespace: String, botId: String, indexSessionId: String): String = OpenSearchUtils.normalizeDocumentIndexName(namespace, botId, indexSessionId) - override fun getDocumentSearchParams(): OpenSearchParams = - OpenSearchParams(k = k, filter = null) + override fun getDocumentSearchParams(kNeighborsDocuments: Int): OpenSearchParams = + OpenSearchParams(k = kNeighborsDocuments, filter = null) } data class OpenSearchParams( - val k: Int = 4, + val k: Int, val filter: List? = null ) : DocumentSearchParamsBase(VectorStoreProvider.OpenSearch) diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt index 3b14335b95..567c7e949f 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt @@ -23,25 +23,23 @@ data class PGVectorStoreSetting( override val port: Int, override val username: String, override val password: T, - override val k: Int, val database: String, ) : VectorStoreSettingBase( provider = VectorStoreProvider.PGVector, host = host, port = port, username = username, - password = password, - k = k + password = password ) { override fun normalizeDocumentIndexName(namespace: String, botId: String, indexSessionId: String): String = PGVectorUtils.normalizeDocumentIndexName(namespace, botId, indexSessionId) - override fun getDocumentSearchParams(): PGVectorParams = - PGVectorParams(k = k, filter = null) + override fun getDocumentSearchParams(kNeighborsDocuments: Int): PGVectorParams = + PGVectorParams(k = kNeighborsDocuments, filter = null) } data class PGVectorParams( - val k: Int = 4, + val k: Int, val filter: Map? = null ) : DocumentSearchParamsBase(VectorStoreProvider.PGVector) \ No newline at end of file diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt index bdb54322bb..af31a360a4 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt @@ -37,9 +37,7 @@ abstract class VectorStoreSettingBase( open val host: String, open val port: Int, open val username: String, - open val password: T, - // The number of documents (neighbors) to return for each vector search - open val k: Int, + open val password: T ){ /** * Normalize the document index name @@ -51,7 +49,7 @@ abstract class VectorStoreSettingBase( /** * Get search params (filter) params */ - abstract fun getDocumentSearchParams(): DocumentSearchParamsBase + abstract fun getDocumentSearchParams(kNeighborsDocuments: Int): DocumentSearchParamsBase } typealias VectorStoreSettingDTO = VectorStoreSettingBase diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt index 5c276daac8..b86e78794b 100644 --- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt +++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt @@ -23,9 +23,6 @@ import ai.tock.shared.property private val vectorStore = property( name = "tock_gen_ai_orchestrator_vector_store_provider", defaultValue = VectorStoreProvider.OpenSearch.name) -private val kNeighborsDocuments = intProperty( - name = "tock_gen_ai_orchestrator_document_number_neighbors", - defaultValue = 1) typealias DocumentIndexName = String @@ -35,11 +32,12 @@ object VectorStoreUtils { namespace: String, botId: String, indexSessionId: String, + kNeighborsDocuments: Int, vectorStoreSetting: VectorStoreSetting?, ): Pair { vectorStoreSetting?.let { - val searchParams = it.getDocumentSearchParams() + val searchParams = it.getDocumentSearchParams(kNeighborsDocuments) val indexName = it.normalizeDocumentIndexName(namespace, botId, indexSessionId) return Pair(searchParams, indexName) } diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py index 49f05c236e..d75b5f06cd 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py @@ -67,11 +67,11 @@ class _Settings(BaseSettings): vector_store_database: Optional[str] = None # Only if necessary. Example: PGVector vector_store_secret_manager_provider: Optional[SecretManagerProvider] = None vector_store_credentials_secret_name: Optional[str] = None - """Number of document to retrieve from the Vector Store""" - vector_store_k: int = 4 """Request timeout: set the maximum time (in seconds) for the request to be completed.""" vector_store_timeout: int = 4 - vector_store_test_query: str = 'What knowledge do you have?' + """Maximum number of documents to be retrieved from the Vector Store""" + vector_store_test_max_docs_retrieved: int = 4 + vector_store_test_query: str = 'Any definition' """Observability Setting""" observability_provider_max_retries: int = 0 diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py index 8bd809b477..b74aeef107 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py @@ -138,19 +138,13 @@ class QADebugData(BaseModel): class RagDebugData(QADebugData): """A RAG debug data""" - condense_question_prompt: Optional[str] = Field( + question_condensing_prompt: Optional[str] = Field( description='The prompt of the question rephrased with the history of the conversation.', - examples=[ - """Given the following conversation and a follow up question, - rephrase the follow up question to be a standalone question, in its original language. - Chat History: - Human: What travel offers are you proposing? - Assistant: We offer trips to all of Europe and North Africa. - Follow Up Input: I'm interested in going to Morocco - Standalone question:""" - ], + examples=['Given the following conversation, rephrase the follow up question to be a standalone question.'], ) - condense_question: Optional[str] = Field( + question_condensing_history: list[ChatMessage] = Field( + description="Conversation history, used to reformulate the user's question.") + condensed_question: Optional[str] = Field( description='The question rephrased with the history of the conversation.', examples=['Hello, how to plan a trip to Morocco ?'], ) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py index e6bcd14a06..5951b49464 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py @@ -163,11 +163,12 @@ class RagQuery(BaseQuery): """The RAG query model""" dialog: Optional[DialogDetails] = Field(description='The user dialog details.') - # condense_question_llm_setting: LLMSetting = - # Field(description="LLM setting, used to condense the user's question.") - # condense_question_prompt: PromptTemplate = Field( - # description='Prompt template, used to create a prompt with inputs for jinja and fstring format' - # ) + question_condensing_llm_setting: Optional[LLMSetting] = Field( + description="LLM setting, used to condense the user's question.", default=None) + question_condensing_prompt: Optional[PromptTemplate] = Field( + description='Prompt template, used to create a prompt with inputs for jinja and fstring format', + default = None + ) question_answering_llm_setting: LLMSetting = Field( description='LLM setting, used to perform a QA Prompt.' ) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py index b65d5351f2..c3474cb400 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py @@ -53,7 +53,7 @@ async def generate_and_split_sentences( start_time = time.time() logger.info('Prompt completion - template validation') - validate_prompt_template(query.prompt) + validate_prompt_template(query.prompt, 'Sentence generation prompt') parser = NumberedListOutputParser() prompt = LangChainPromptTemplate.from_template( diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py index e85003d9ac..77343c691b 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py @@ -27,13 +27,14 @@ class RAGCallbackHandler(BaseCallbackHandler): """Customized RAG callback handler that retrieves data from the chain execution.""" - records: Dict[str, Any] = { - 'chat_prompt': None, - 'chat_chain_output': None, - 'rag_prompt': None, - 'rag_chain_output': None, - 'documents': None, - } + def __init__(self): + self.records: Dict[str, Any] = { + 'chat_prompt': None, + 'chat_chain_output': None, + 'rag_prompt': None, + 'rag_chain_output': None, + 'documents': None, + } def on_chain_start( self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py index a2dd491326..d8ebe5138a 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py @@ -66,5 +66,5 @@ async def check_vector_store_connection(self) -> bool: VectorStore to check the connection independently. """ await self.get_vector_store().asimilarity_search( - query=application_settings.vector_store_test_query, k=application_settings.vector_store_k) + query=application_settings.vector_store_test_query, k=application_settings.vector_store_test_max_docs_retrieved) return True diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py index f57f838944..51b3a262a5 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py @@ -75,7 +75,7 @@ async def check_vector_store_setting(self) -> bool: """ logger.info('Invoke vector store provider to check setting') documents: List[Document] = await self.get_vector_store().asimilarity_search( - query=application_settings.vector_store_test_query, k=application_settings.vector_store_k + query=application_settings.vector_store_test_query, k=application_settings.vector_store_test_max_docs_retrieved ) logger.debug('Invocation successful') logger.debug('[index: %s], [query: %s], [document count: %s]', self.index_name, application_settings.vector_store_test_query, len(documents)) diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py index 259de72878..1be6050e3f 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py @@ -52,6 +52,8 @@ from gen_ai_orchestrator.models.observability.observability_trace import ( ObservabilityTrace, ) +from gen_ai_orchestrator.models.prompt.prompt_formatter import PromptFormatter +from gen_ai_orchestrator.models.prompt.prompt_template import PromptTemplate from gen_ai_orchestrator.models.rag.rag_models import ( ChatMessageType, Footnote, @@ -116,7 +118,7 @@ async def execute_rag_chain(query: RagQuery, debug: bool) -> RagResponse: inputs = { **query.question_answering_prompt.inputs, - 'chat_history': message_history.messages, + 'chat_history': message_history.messages } logger.debug( @@ -224,7 +226,16 @@ def create_rag_chain(query: RagQuery, vector_db_async_mode: Optional[bool] = Tru The RAG chain. """ - llm_factory = get_llm_factory(setting=query.question_answering_llm_setting) + # Log progress and validate prompt template + logger.info('RAG chain - Validating LLM prompt template') + validate_prompt_template(query.question_answering_prompt, 'Question answering prompt') + if query.question_condensing_prompt is not None: + validate_prompt_template(query.question_condensing_prompt, 'Question condensing prompt') + + question_condensing_llm_factory = None + if query.question_condensing_llm_setting is not None: + question_condensing_llm_factory = get_llm_factory(setting=query.question_condensing_llm_setting) + question_answering_llm_factory = get_llm_factory(setting=query.question_answering_llm_setting) em_factory = get_em_factory(setting=query.embedding_question_em_setting) vector_store_factory = get_vector_store_factory( setting=query.vector_store_setting, @@ -239,22 +250,24 @@ def create_rag_chain(query: RagQuery, vector_db_async_mode: Optional[bool] = Tru if query.compressor_setting: retriever = add_document_compressor(retriever, query.compressor_setting) - # Log progress and validate prompt template - logger.info('RAG chain - Validating LLM prompt template') - validate_prompt_template(query.question_answering_prompt) - logger.debug('RAG chain - Document index name: %s', query.document_index_name) # Build LLM and prompt templates - llm = llm_factory.get_language_model() + question_condensing_llm = None + if question_condensing_llm_factory is not None: + question_condensing_llm = question_condensing_llm_factory.get_language_model() + question_answering_llm = question_answering_llm_factory.get_language_model() rag_prompt = build_rag_prompt(query) # Construct the RAG chain using the prompt and LLM, # This chain will consume the documents retrieved by the retriever as input. - rag_chain = construct_rag_chain(llm, rag_prompt) + rag_chain = construct_rag_chain(question_answering_llm, rag_prompt) # Build the chat chain for question contextualization - chat_chain = build_question_condensation_chain(llm) + chat_chain = build_question_condensation_chain( + question_condensing_llm if question_condensing_llm is not None else question_answering_llm, + query.question_condensing_prompt + ) # Function to contextualize the question based on chat history contextualize_question_fn = partial(contextualize_question, chat_chain=chat_chain) @@ -288,17 +301,24 @@ def construct_rag_chain(llm, rag_prompt): "question": lambda inputs: inputs["question"] # Override the user's original question with the condensed one } | rag_prompt | llm | StrOutputParser(name="rag_chain_output") -def build_question_condensation_chain(llm) -> ChatPromptTemplate: +def build_question_condensation_chain(llm, prompt: Optional[PromptTemplate]) -> ChatPromptTemplate: """ Build the chat chain for contextualizing questions. """ + if prompt is None: + # Default prompt + prompt = PromptTemplate( + formatter = PromptFormatter.F_STRING, inputs = {}, + template = "Given a chat history and the latest user question which might reference context in \ +the chat history, formulate a standalone question which can be understood without the chat history. \ +Do NOT answer the question, just reformulate it if needed and otherwise return it as is.", + ) + return ChatPromptTemplate.from_messages([ - ("system", """Given a chat history and the latest user question which might reference context in \ - the chat history, formulate a standalone question which can be understood without the chat history. \ - Do NOT answer the question, just reformulate it if needed and otherwise return it as is."""), + ("system", prompt.template), MessagesPlaceholder(variable_name="chat_history"), ("human", "{question}"), - ]) | llm | StrOutputParser(name="chat_chain_output") + ]).partial(**prompt.inputs) | llm | StrOutputParser(name="chat_chain_output") def contextualize_question(inputs: dict, chat_chain) -> str: """ @@ -328,14 +348,21 @@ def rag_guard(inputs, response, documents_required): chain_reply_no_answer = response['answer'] == inputs['no_answer'] if no_docs_but_required: - if chain_can_give_no_answer_reply and chain_reply_no_answer: # We expect the chain to use it's no answer value and it did, it's the expected behavior + if chain_can_give_no_answer_reply and chain_reply_no_answer: + # We expect the chain to use its non-response value, and it has done so, which is the expected behavior. return # Everything else isn't expected message = 'The RAG system cannot provide an answer when no documents are found and documents are required' rag_log(level=ERROR, message=message, inputs=inputs, response=response) raise GenAIGuardCheckException(ErrorInfo(cause=message)) - return + if chain_reply_no_answer and not no_docs_retrieved: + # If the chain responds with its non-response value and the documents are retrieved, + # so we remove them from the RAG response. + message = 'The RAG gives no answer for user question, but some documents has been found!' + rag_log(level=WARNING, message=message, inputs=inputs, response=response) + response['documents'] = [] + def rag_log(level, message, inputs, response): """ @@ -366,7 +393,7 @@ def get_rag_documents(handler: RAGCallbackHandler) -> List[RagDocument]: Get documents used on RAG context Args: - response: the rag answer + handler: the RAG Callback Handler """ return [ @@ -384,10 +411,15 @@ def get_rag_debug_data( ) -> RagDebugData: """RAG debug data assembly""" + history = [] + if query.dialog: + history = query.dialog.history + return RagDebugData( user_question=query.question_answering_prompt.inputs['question'], - condense_question_prompt=records_callback_handler.records['chat_prompt'], - condense_question=records_callback_handler.records['chat_chain_output'], + question_condensing_prompt=records_callback_handler.records['chat_prompt'], + question_condensing_history=history, + condensed_question=records_callback_handler.records['chat_chain_output'], question_answering_prompt=records_callback_handler.records['rag_prompt'], documents=get_rag_documents(records_callback_handler), document_index_name=query.document_index_name, diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py index 7ec3af49fb..9374d4a547 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py +++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py @@ -11,12 +11,13 @@ logger = logging.getLogger(__name__) -def validate_prompt_template(prompt: PromptTemplate): +def validate_prompt_template(prompt: PromptTemplate, name: str): """ Prompt template validation Args: prompt: The prompt template + name: The prompt name Returns: Nothing. @@ -27,7 +28,7 @@ def validate_prompt_template(prompt: PromptTemplate): try: Template(prompt.template).render(prompt.inputs) except TemplateError as exc: - logger.error('Prompt completion - template validation failed!') + logger.error(f'Validation of the prompt Template has failed! ({name})') logger.error(exc) raise GenAIPromptTemplateException( ErrorInfo( diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py b/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py index 6ee2a52776..0e57fa2b7b 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py +++ b/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py @@ -32,7 +32,6 @@ def test_generate_sentences(): }, 'model': 'dddddd', 'temperature': '0.0', - 'prompt': 'List 3 ice cream flavors.', 'responses': ['1. vanilla\n2. chocolate\n3. strawberry'], }, 'prompt': { @@ -64,7 +63,6 @@ def test_generate_sentences_template_error(): }, 'model': 'dddddd', 'temperature': '0.0', - 'prompt': 'List 3 ice cream flavors.', 'responses': ['1. vanilla\n2. chocolate\n3. strawberry'], }, 'prompt': { diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py index 58c1a00aa2..11580d80eb 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py +++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py @@ -28,4 +28,4 @@ def test_validate_prompt_template(): }, } template = PromptTemplate(**json) - validate_prompt_template(template) + validate_prompt_template(template, "prompt_name") diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py index 326e4ed08f..2b4b3b0f4f 100644 --- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py +++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py @@ -126,7 +126,6 @@ def test_get_open_ai_llm_factory(): }, 'model': 'model', 'temperature': '0', - 'prompt': 'List 3 ice cream flavors.', } ) ) @@ -148,7 +147,6 @@ def test_get_azure_open_ai_llm_factory(): 'api_base': 'https://doc.tock.ai/tock', 'api_version': 'version', 'temperature': '0', - 'prompt': 'List 3 ice cream flavors.', } ) ) @@ -166,7 +164,6 @@ def test_get_fake_llm_factory(): 'secret': 'ab7***************************A1IV4B', }, 'temperature': '0', - 'prompt': 'List 3 ice cream flavors.', 'responses': ['1. vanilla\n2. chocolate\n3. strawberry'], } ) @@ -190,7 +187,6 @@ def test_get_open_ai_em_factory(): 'secret': 'ab7***************************A1IV4B', }, 'model': 'model', - 'prompt': 'List 3 ice cream flavors.', } ) ) @@ -211,7 +207,6 @@ def test_get_azure_open_ai_em_factory(): 'model': 'text-embedding-ada-002', 'api_base': 'https://doc.tock.ai/tock', 'api_version': 'version', - 'prompt': 'List 3 ice cream flavors.', } ) ) @@ -243,7 +238,6 @@ def test_get_open_search_vector_store_factory(): 'secret': 'ab7***************************A1IV4B', }, 'model': 'model', - 'prompt': 'List 3 ice cream flavors.', } ) ) diff --git a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py index a2355c7365..ec625930a5 100644 --- a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py +++ b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py @@ -87,7 +87,7 @@ from langchain_community.document_loaders.dataframe import DataFrameLoader from langchain_core.documents import Document -from indexing_details import IndexingDetails +from models import IndexingDetails # Define the size of the csv field -> Set to maximum to process large csvs csv.field_size_limit(sys.maxsize)
Fournisseur de base vectorielleVector base providers Configuration