diff --git a/bot/admin/server/src/main/kotlin/BotAdminService.kt b/bot/admin/server/src/main/kotlin/BotAdminService.kt
index ec8386b168..b936cf3d7c 100644
--- a/bot/admin/server/src/main/kotlin/BotAdminService.kt
+++ b/bot/admin/server/src/main/kotlin/BotAdminService.kt
@@ -1153,7 +1153,9 @@ object BotAdminService {
         // delete the RAG configuration
         ragConfigurationDAO.findByNamespaceAndBotId(app.namespace, app.name)?.let { config ->
             ragConfigurationDAO.delete(config._id)
-            config.llmSetting.apiKey?.let { SecurityUtils.deleteSecret(it) }
+            config.questionCondensingLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) }
+            config.questionAnsweringLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) }
+            config.llmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) }
             config.emSetting.apiKey?.let { SecurityUtils.deleteSecret(it) }
         }
 
diff --git a/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt b/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt
index 9810aa0599..13f8672d72 100644
--- a/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt
+++ b/bot/admin/server/src/main/kotlin/model/BotRAGConfigurationDTO.kt
@@ -18,6 +18,7 @@ package ai.tock.bot.admin.model
 
 import ai.tock.bot.admin.bot.rag.BotRAGConfiguration
 import ai.tock.bot.admin.service.VectorStoreService
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorcore.mappers.EMSettingMapper
 import ai.tock.genai.orchestratorcore.mappers.LLMSettingMapper
 import ai.tock.genai.orchestratorcore.models.Constants
@@ -34,26 +35,39 @@ data class BotRAGConfigurationDTO(
     val namespace: String,
     val botId: String,
     val enabled: Boolean = false,
-    val llmSetting: LLMSettingDTO,
+    val questionCondensingLlmSetting: LLMSettingDTO? = null,
+    val questionCondensingPrompt: PromptTemplate? = null,
+    val questionAnsweringLlmSetting: LLMSettingDTO,
+    val questionAnsweringPrompt: PromptTemplate,
     val emSetting: EMSettingDTO,
     val indexSessionId: String? = null,
     val indexName: String? = null,
     val noAnswerSentence: String,
     val noAnswerStoryId: String? = null,
     val documentsRequired: Boolean = true,
+    val debugEnabled: Boolean,
+    val maxDocumentsRetrieved: Int,
+    val maxMessagesFromHistory: Int,
 ) {
     constructor(configuration: BotRAGConfiguration) : this(
         id = configuration._id.toString(),
         namespace = configuration.namespace,
         botId = configuration.botId,
         enabled = configuration.enabled,
-        llmSetting = configuration.llmSetting.toDTO(),
+        questionCondensingLlmSetting = configuration.questionCondensingLlmSetting?.toDTO(),
+        questionCondensingPrompt = configuration.questionCondensingPrompt,
+        questionAnsweringLlmSetting = configuration.getQuestionAnsweringLLMSetting().toDTO(),
+        questionAnsweringPrompt = configuration.questionAnsweringPrompt
+            ?: configuration.initQuestionAnsweringPrompt(),
         emSetting = configuration.emSetting.toDTO(),
         indexSessionId = configuration.indexSessionId,
         indexName = configuration.generateIndexName(),
         noAnswerSentence = configuration.noAnswerSentence,
         noAnswerStoryId = configuration.noAnswerStoryId,
         documentsRequired = configuration.documentsRequired,
+        debugEnabled = configuration.debugEnabled,
+        maxDocumentsRetrieved = configuration.maxDocumentsRetrieved,
+        maxMessagesFromHistory = configuration.maxMessagesFromHistory,
     )
 
     fun toBotRAGConfiguration(): BotRAGConfiguration =
@@ -62,12 +76,20 @@ data class BotRAGConfigurationDTO(
             namespace = namespace,
             botId = botId,
             enabled = enabled,
-            llmSetting = LLMSettingMapper.toEntity(
+            questionCondensingLlmSetting = LLMSettingMapper.toEntity(
+                namespace = namespace,
+                botId = botId,
+                feature = Constants.GEN_AI_RAG_QUESTION_CONDENSING,
+                dto = questionCondensingLlmSetting!!
+            ),
+            questionCondensingPrompt = questionCondensingPrompt,
+            questionAnsweringLlmSetting = LLMSettingMapper.toEntity(
                 namespace = namespace,
                 botId = botId,
                 feature = Constants.GEN_AI_RAG_QUESTION_ANSWERING,
-                dto = llmSetting
+                dto = questionAnsweringLlmSetting
             ),
+            questionAnsweringPrompt = questionAnsweringPrompt,
             emSetting = EMSettingMapper.toEntity(
                 namespace = namespace,
                 botId = botId,
@@ -78,6 +100,9 @@ data class BotRAGConfigurationDTO(
             noAnswerSentence = noAnswerSentence,
             noAnswerStoryId = noAnswerStoryId,
             documentsRequired = documentsRequired,
+            debugEnabled = debugEnabled,
+            maxDocumentsRetrieved = maxDocumentsRetrieved,
+            maxMessagesFromHistory = maxMessagesFromHistory,
         )
 }
 
@@ -87,6 +112,7 @@ private fun BotRAGConfiguration.generateIndexName(): String? {
             namespace,
             botId,
             it,
+            maxDocumentsRetrieved,
             VectorStoreService.getVectorStoreConfiguration(namespace, botId, enabled = true)
                 ?.setting
         ).second
diff --git a/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt b/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt
index 9e6ea41b91..ad8b7c5c46 100644
--- a/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt
+++ b/bot/admin/server/src/main/kotlin/model/BotSentenceGenerationConfigurationDTO.kt
@@ -17,6 +17,7 @@
 package ai.tock.bot.admin.model
 
 import ai.tock.bot.admin.bot.sentencegeneration.BotSentenceGenerationConfiguration
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorcore.mappers.LLMSettingMapper
 import ai.tock.genai.orchestratorcore.models.Constants
 import ai.tock.genai.orchestratorcore.models.llm.LLMSettingDTO
@@ -32,6 +33,7 @@ data class BotSentenceGenerationConfigurationDTO(
     val enabled: Boolean = false,
     val nbSentences: Int,
     val llmSetting: LLMSettingDTO,
+    val prompt: PromptTemplate,
 ) {
     constructor(configuration: BotSentenceGenerationConfiguration) : this(
         id = configuration._id.toString(),
@@ -40,6 +42,7 @@ data class BotSentenceGenerationConfigurationDTO(
         enabled = configuration.enabled,
         nbSentences = configuration.nbSentences,
         llmSetting = configuration.llmSetting.toDTO(),
+        prompt = configuration.prompt ?: configuration.initPrompt()
     )
 
     fun toSentenceGenerationConfiguration(): BotSentenceGenerationConfiguration =
@@ -54,7 +57,8 @@ data class BotSentenceGenerationConfigurationDTO(
                 botId = botId,
                 feature = Constants.GEN_AI_COMPLETION_SENTENCE_GENERATION,
                 dto = llmSetting
-            )
+            ),
+            prompt = prompt
         )
 }
 
diff --git a/bot/admin/server/src/main/kotlin/service/CompletionService.kt b/bot/admin/server/src/main/kotlin/service/CompletionService.kt
index 93379172a5..846e0384a1 100644
--- a/bot/admin/server/src/main/kotlin/service/CompletionService.kt
+++ b/bot/admin/server/src/main/kotlin/service/CompletionService.kt
@@ -63,6 +63,9 @@ object CompletionService {
         // Get LLM Setting and override the temperature
         val llmSetting = sentenceGenerationConfig.llmSetting.copyWithTemperature(request.llmTemperature)
 
+        // Get prompt
+        val prompt = sentenceGenerationConfig.prompt ?: sentenceGenerationConfig.initPrompt()
+
         // Create the inputs map
         val inputs = mapOf(
             "locale" to request.locale,
@@ -75,18 +78,11 @@ object CompletionService {
             )
         )
 
-        // Create a Jinja2 prompt template
-        val prompt = PromptTemplate(
-            formatter = Formatter.JINJA2.id,
-            template = llmSetting.prompt,
-            inputs = inputs
-        )
-
         // call the completion service to generate sentences
         return completionService
             .generateSentences(
                 SentenceGenerationQuery(
-                    llmSetting, prompt,
+                    llmSetting, prompt.copy(inputs = inputs),
                     ObservabilityService.getObservabilityConfiguration(namespace, botId, enabled = true)?.setting
                 )
             )
diff --git a/bot/admin/server/src/main/kotlin/service/RAGService.kt b/bot/admin/server/src/main/kotlin/service/RAGService.kt
index 16b5d6522e..cd4db7b7f4 100644
--- a/bot/admin/server/src/main/kotlin/service/RAGService.kt
+++ b/bot/admin/server/src/main/kotlin/service/RAGService.kt
@@ -64,8 +64,11 @@ object RAGService {
         logger.info { "Deleting the RAG Configuration [namespace: $namespace, botId: $botId]" }
         ragConfigurationDAO.delete(ragConfig._id)
 
-        logger.info { "Deleting the LLM secret ..." }
-        ragConfig.llmSetting.apiKey?.let { SecurityUtils.deleteSecret(it) }
+        logger.info { "Deleting the question condensing LLM secret ..." }
+        ragConfig.questionCondensingLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) }
+        logger.info { "Deleting the question answering LLM secret ..." }
+        ragConfig.questionAnsweringLlmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) }
+        ragConfig.llmSetting?.apiKey?.let { SecurityUtils.deleteSecret(it) }
         logger.info { "Deleting the Embedding secret ..." }
         ragConfig.emSetting.apiKey?.let { SecurityUtils.deleteSecret(it) }
     }
diff --git a/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt b/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt
index d2de5a0ab8..f992c344ee 100644
--- a/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt
+++ b/bot/admin/server/src/main/kotlin/service/RAGValidationService.kt
@@ -37,15 +37,24 @@ object RAGValidationService {
     private val vectorStoreProviderService: VectorStoreProviderService get() = injector.provide()
 
     fun validate(ragConfig: BotRAGConfiguration): Set<ErrorMessage> {
+        val observabilitySetting = ObservabilityService.getObservabilityConfiguration(
+            ragConfig.namespace, ragConfig.botId, enabled = true
+        )?.setting
+
         return mutableSetOf<ErrorMessage>().apply {
-            val llmErrors = llmProviderService.checkSetting(
+            val questionCondensingLlmErrors = llmProviderService.checkSetting(
+                LLMProviderSettingStatusQuery(
+                    ragConfig.questionCondensingLlmSetting!!,
+                    observabilitySetting
+                )
+            ).getErrors("LLM setting check failed (for question condensing)")
+
+            val questionAnsweringLlmErrors = llmProviderService.checkSetting(
                 LLMProviderSettingStatusQuery(
-                    ragConfig.llmSetting,
-                    ObservabilityService.getObservabilityConfiguration(
-                        ragConfig.namespace, ragConfig.botId, enabled = true
-                    )?.setting
+                    ragConfig.questionAnsweringLlmSetting!!,
+                    observabilitySetting
                 )
-            ).getErrors("LLM setting check failed")
+            ).getErrors("LLM setting check failed (for question answering)")
 
             val embeddingErrors = emProviderService.checkSetting(
                 EMProviderSettingStatusQuery(ragConfig.emSetting)
@@ -59,7 +68,11 @@ object RAGValidationService {
                 )?.setting
 
                 val (_, indexName) = VectorStoreUtils.getVectorStoreElements(
-                    ragConfig.namespace, ragConfig.botId, ragConfig.indexSessionId!!, vectorStoreSetting
+                    ragConfig.namespace,
+                    ragConfig.botId,
+                    ragConfig.indexSessionId!!,
+                    ragConfig.maxDocumentsRetrieved,
+                    vectorStoreSetting
                 )
 
                 vectorStoreProviderService.checkSetting(
@@ -71,7 +84,7 @@ object RAGValidationService {
                 ).getErrors("Vector store setting check failed")
             } ?: emptySet()
 
-            addAll(llmErrors + embeddingErrors + indexSessionIdErrors + vectorStoreErrors)
+            addAll(questionCondensingLlmErrors + questionAnsweringLlmErrors + embeddingErrors + indexSessionIdErrors + vectorStoreErrors)
         }
     }
 
diff --git a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt
index eeabc764c8..ce9301c5da 100644
--- a/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt
+++ b/bot/admin/server/src/test/kotlin/service/RAGServiceTest.kt
@@ -30,6 +30,7 @@ import ai.tock.bot.test.TFunction
 import ai.tock.bot.test.TRunnable
 import ai.tock.bot.test.TSupplier
 import ai.tock.bot.test.TestCase
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorclient.responses.ProviderSettingStatusResponse
 import ai.tock.genai.orchestratorclient.services.EMProviderService
 import ai.tock.genai.orchestratorclient.services.LLMProviderService
@@ -65,17 +66,24 @@ class RAGServiceTest : AbstractTest() {
         const val INDEX_SESSION_ID = "1010101"
 
         private val DEFAULT_RAG_CONFIG = BotRAGConfigurationDTO(
-            id = "ragId",
+             id = "ragId",
             namespace = NAMESPACE,
             botId = BOT_ID,
             enabled = false,
-            llmSetting = OpenAILLMSettingDTO(
+            questionCondensingLlmSetting = OpenAILLMSettingDTO(
                 apiKey = "apikey",
                 model = MODEL,
-                prompt = PROMPT,
                 temperature = TEMPERATURE,
                 baseUrl = "https://api.openai.com/v1"
             ),
+            questionCondensingPrompt = PromptTemplate(template = PROMPT),
+            questionAnsweringLlmSetting = OpenAILLMSettingDTO(
+                apiKey = "apikey",
+                model = MODEL,
+                temperature = TEMPERATURE,
+                baseUrl = "https://api.openai.com/v1"
+            ),
+            questionAnsweringPrompt = PromptTemplate(template = PROMPT),
             emSetting = AzureOpenAIEMSettingDTO(
                 apiKey = "apiKey",
                 apiVersion = "apiVersion",
@@ -83,7 +91,11 @@ class RAGServiceTest : AbstractTest() {
                 model = "model",
                 apiBase = "url"
             ),
-            noAnswerSentence = "No answer sentence"
+            noAnswerSentence = "No answer sentence",
+            documentsRequired = true,
+            debugEnabled = false,
+            maxDocumentsRetrieved = 2,
+            maxMessagesFromHistory = 2,
         )
 
         private val DEFAULT_BOT_CONFIG = aApplication.copy(namespace = NAMESPACE, botId = BOT_ID)
@@ -186,9 +198,9 @@ class RAGServiceTest : AbstractTest() {
             Assertions.assertEquals(BOT_ID, captured.botId)
             Assertions.assertEquals(true, captured.enabled)
             Assertions.assertEquals(NAMESPACE, captured.namespace)
-            Assertions.assertEquals(PROVIDER, captured.llmSetting.provider.name)
-            Assertions.assertEquals(TEMPERATURE, captured.llmSetting.temperature)
-            Assertions.assertEquals(PROMPT, captured.llmSetting.prompt)
+            Assertions.assertEquals(PROVIDER, captured.questionAnsweringLlmSetting!!.provider.name)
+            Assertions.assertEquals(TEMPERATURE, captured.questionAnsweringLlmSetting!!.temperature)
+            Assertions.assertEquals(PROMPT, captured.questionAnsweringPrompt!!.template)
             Assertions.assertEquals(null, captured.noAnswerStoryId)
         }
 
diff --git a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt
index 4f5a538be7..2aec481459 100644
--- a/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt
+++ b/bot/admin/server/src/test/kotlin/service/RAGValidationServiceTest.kt
@@ -19,6 +19,7 @@ package ai.tock.bot.admin.service
 import ai.tock.bot.admin.bot.observability.BotObservabilityConfigurationDAO
 import ai.tock.bot.admin.bot.vectorstore.BotVectorStoreConfigurationDAO
 import ai.tock.bot.admin.model.BotRAGConfigurationDTO
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorclient.responses.ErrorInfo
 import ai.tock.genai.orchestratorclient.responses.ErrorResponse
 import ai.tock.genai.orchestratorclient.responses.ProviderSettingStatusResponse
@@ -63,7 +64,7 @@ class RAGValidationServiceTest {
     }
 
     private val openAILLMSetting = OpenAILLMSetting(
-        apiKey = "123-abc", model = "unavailable-model", temperature = "0.4", prompt = "How to bike in the rain",
+        apiKey = "123-abc", model = "unavailable-model", temperature = "0.4",
         baseUrl = "https://api.openai.com/v1",
     )
 
@@ -78,9 +79,16 @@ class RAGValidationServiceTest {
     private val ragConfiguration = BotRAGConfigurationDTO(
         namespace = "namespace",
         botId = "botId",
-        llmSetting = openAILLMSetting,
+        questionCondensingLlmSetting = openAILLMSetting,
+        questionCondensingPrompt = PromptTemplate(template = "test"),
+        questionAnsweringLlmSetting = openAILLMSetting,
+        questionAnsweringPrompt = PromptTemplate(template = "How to bike in the rain"),
         emSetting = azureOpenAIEMSetting,
         noAnswerSentence = " No answer sentence",
+        documentsRequired = true,
+        debugEnabled = false,
+        maxDocumentsRetrieved = 2,
+        maxMessagesFromHistory = 2,
     )
 
     @Test
@@ -163,7 +171,7 @@ class RAGValidationServiceTest {
     fun `validation of the RAG configuration when the Orchestrator returns 2 errors for LLM and 1 for Embedding model, the RAG function has not been activated`() {
 
         // GIVEN
-        // - 3 errors returned by Generative AI Orchestrator for LLM (2) and EM (1)
+        // - 3 errors returned by Generative AI Orchestrator for LLM (4 = 2 for condensing + 2 for answering) and EM (1)
         // - RAG is not enabled
         every {
             llmProviderService.checkSetting(any())
@@ -187,11 +195,13 @@ class RAGValidationServiceTest {
         )
 
         // THEN :
-        // Check that 3 errors have been found
-        assertEquals(2, errors.size)
+        // Check that 3 groups of errors have been found
+        assertEquals(3, errors.size)
         assertEquals("10", (((errors.elementAt(0).params) as List<*>)[0] as ErrorResponse).code)
         assertEquals("20", (((errors.elementAt(0).params) as List<*>)[1] as ErrorResponse).code)
-        assertEquals("30", (((errors.elementAt(1).params) as List<*>)[0] as ErrorResponse).code)
+        assertEquals("10", (((errors.elementAt(1).params) as List<*>)[0] as ErrorResponse).code)
+        assertEquals("20", (((errors.elementAt(1).params) as List<*>)[1] as ErrorResponse).code)
+        assertEquals("30", (((errors.elementAt(2).params) as List<*>)[0] as ErrorResponse).code)
     }
 
     private fun createFakeErrorResponse(code: String) = ErrorResponse(
diff --git a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt
index 279c94be8a..5c38398e8d 100644
--- a/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt
+++ b/bot/engine/src/main/kotlin/admin/bot/rag/BotRAGConfiguration.kt
@@ -16,8 +16,11 @@
 
 package ai.tock.bot.admin.bot.rag
 
+import ai.tock.genai.orchestratorclient.requests.Formatter
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorcore.models.em.EMSetting
 import ai.tock.genai.orchestratorcore.models.llm.LLMSetting
+import ai.tock.shared.intProperty
 import org.litote.kmongo.Id
 
 data class BotRAGConfiguration(
@@ -25,10 +28,31 @@ data class BotRAGConfiguration(
     val namespace: String,
     val botId: String,
     val enabled: Boolean,
-    val llmSetting: LLMSetting,
+    val questionCondensingLlmSetting: LLMSetting? = null,
+    val questionCondensingPrompt: PromptTemplate? = null,
+    val questionAnsweringLlmSetting: LLMSetting? = null,
+    val questionAnsweringPrompt: PromptTemplate? = null,
+    @Deprecated("use BotRAGConfiguration#questionAnsweringLlmSetting")
+    val llmSetting: LLMSetting? = null,
     val emSetting: EMSetting,
     val indexSessionId: String? = null,
     val noAnswerSentence: String,
     val noAnswerStoryId: String? = null,
     val documentsRequired: Boolean = true,
-)
+    val debugEnabled: Boolean = false,
+    val maxDocumentsRetrieved: Int = 4,
+    val maxMessagesFromHistory: Int = 5,
+) {
+    @Deprecated("use BotRAGConfiguration#questionAnsweringLlmSetting")
+    fun initQuestionAnsweringPrompt(): PromptTemplate {
+        // Temporary stopgap until the next version of Tock,
+        // which will remove the prompt at LLMSetting level and use the promptTemplate
+        return PromptTemplate(
+            formatter = Formatter.F_STRING.id,
+            template = getQuestionAnsweringLLMSetting().prompt!!
+        )
+    }
+
+    @Deprecated("use BotRAGConfiguration#questionAnsweringLlmSetting")
+    fun getQuestionAnsweringLLMSetting(): LLMSetting = (questionAnsweringLlmSetting ?: llmSetting)!!
+}
diff --git a/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt b/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt
index e0eb535a61..7509abc32f 100644
--- a/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt
+++ b/bot/engine/src/main/kotlin/admin/bot/sentencegeneration/BotSentenceGenerationConfiguration.kt
@@ -17,6 +17,8 @@
 package ai.tock.bot.admin.bot.sentencegeneration
 
 
+import ai.tock.genai.orchestratorclient.requests.Formatter
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorcore.models.llm.LLMSetting
 import org.litote.kmongo.Id
 
@@ -27,4 +29,15 @@ data class BotSentenceGenerationConfiguration(
     val enabled: Boolean = false,
     val nbSentences: Int,
     val llmSetting: LLMSetting,
-)
\ No newline at end of file
+    val prompt: PromptTemplate? = null,
+) {
+    @Deprecated("use BotSentenceGenerationConfiguration#prompt")
+    fun initPrompt(): PromptTemplate {
+        // Temporary stopgap until the next version of Tock,
+        // which will remove the prompt at LLMSetting level and use the promptTemplate
+        return PromptTemplate(
+            formatter = Formatter.JINJA2.id,
+            template = llmSetting.prompt!!
+        )
+    }
+}
\ No newline at end of file
diff --git a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt
index 1c73b4c785..bb869fc226 100644
--- a/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt
+++ b/bot/engine/src/main/kotlin/engine/config/RAGAnswerHandler.kt
@@ -42,15 +42,9 @@ import ai.tock.shared.*
 import engine.config.AbstractProactiveAnswerHandler
 import mu.KotlinLogging
 
-private val nLastMessages = intProperty(
-    name = "tock_gen_ai_orchestrator_dialog_number_messages",
-    defaultValue = 5)
 private val technicalErrorMessage = property(
     name = "tock_gen_ai_orchestrator_technical_error",
     defaultValue = "Technical error :( sorry!")
-private val ragDebugEnabled = booleanProperty(
-    name = "tock_gen_ai_orchestrator_rag_debug_enabled",
-    defaultValue = false)
 
 
 object RAGAnswerHandler : AbstractProactiveAnswerHandler {
@@ -68,7 +62,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
             val (answer, debug, noAnswerStory, observabilityInfo) = rag(this)
 
             // Add debug data if available and if debugging is enabled
-            if (debug != null && (action.metadata.debugEnabled || ragDebugEnabled)) {
+            if (debug != null) {
                 logger.info { "Send RAG debug data." }
                 sendDebugData("RAG", debug)
             }
@@ -171,24 +165,28 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
                 ragConfiguration.botId,
                 // The indexSessionId is mandatory to enable RAG Story
                 ragConfiguration.indexSessionId!!,
+                ragConfiguration.maxDocumentsRetrieved,
                 vectorStoreSetting
             )
 
+            val questionAnsweringPrompt = ragConfiguration.questionAnsweringPrompt
+                ?: ragConfiguration.initQuestionAnsweringPrompt()
+
             try {
                 val response = ragService.rag(
                     query = RAGQuery(
                         dialog = DialogDetails(
                             dialogId = dialog.id.toString(),
                             userId = dialog.playerIds.firstOrNull { PlayerType.user == it.type }?.id,
-                            history = getDialogHistory(dialog),
+                            history = getDialogHistory(dialog, ragConfiguration.maxMessagesFromHistory),
                             tags = listOf(
                                 "connector:${underlyingConnector.connectorType.id}"
                             )
                         ),
-                        questionAnsweringLlmSetting = ragConfiguration.llmSetting,
-                        questionAnsweringPrompt = PromptTemplate(
-                            formatter = Formatter.F_STRING.id,
-                            template = ragConfiguration.llmSetting.prompt,
+                        questionCondensingLlmSetting = ragConfiguration.questionCondensingLlmSetting,
+                        questionCondensingPrompt = ragConfiguration.questionCondensingPrompt,
+                        questionAnsweringLlmSetting = ragConfiguration.getQuestionAnsweringLLMSetting(),
+                        questionAnsweringPrompt = questionAnsweringPrompt.copy(
                             inputs = mapOf(
                                 "question" to action.toString(),
                                 "locale" to userPreferences.locale.displayLanguage,
@@ -202,7 +200,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
                         vectorStoreSetting = vectorStoreSetting,
                         observabilitySetting = botDefinition.observabilityConfiguration?.setting,
                         documentsRequired = ragConfiguration.documentsRequired,
-                    ), debug = action.metadata.debugEnabled || ragDebugEnabled
+                    ), debug = action.metadata.debugEnabled || ragConfiguration.debugEnabled
                 )
 
                 // Handle RAG response
@@ -232,7 +230,7 @@ object RAGAnswerHandler : AbstractProactiveAnswerHandler {
      * Create a dialog history (Human and Bot message)
      * @param dialog
      */
-    private fun getDialogHistory(dialog: Dialog): List<ChatMessage> = dialog.stories.flatMap { it.actions }.mapNotNull {
+    private fun getDialogHistory(dialog: Dialog, nLastMessages: Int): List<ChatMessage> = dialog.stories.flatMap { it.actions }.mapNotNull {
         when (it) {
             is SendSentence -> if (it.text == null) null
             else ChatMessage(
diff --git a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt
index 392e5b6c88..2f42119a04 100644
--- a/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt
+++ b/bot/storage-mongo/src/test/kotlin/BotRAGConfigurationMongoDAOTest.kt
@@ -17,6 +17,7 @@
 package ai.tock.bot.mongo
 
 import ai.tock.bot.admin.bot.rag.BotRAGConfiguration
+import ai.tock.genai.orchestratorclient.requests.PromptTemplate
 import ai.tock.genai.orchestratorcore.models.em.OpenAIEMSetting
 import ai.tock.genai.orchestratorcore.models.llm.OpenAILLMSetting
 import ai.tock.shared.security.key.RawSecretKey
@@ -44,13 +45,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() {
             "namespace1",
             "botId1",
             false,
-            llmSetting = OpenAILLMSetting(
+            questionAnsweringLlmSetting = OpenAILLMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 model = "modelName1",
                 temperature = "1F",
-                prompt = "prompt1",
                 baseUrl = "https://api.openai.com/v1"
             ),
+            questionAnsweringPrompt = PromptTemplate(template = "prompt template"),
             emSetting = OpenAIEMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 model = "modelName1",
@@ -72,13 +73,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() {
             "namespace1",
             "botId1",
             false,
-            llmSetting = OpenAILLMSetting(
+            questionAnsweringLlmSetting = OpenAILLMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 model = "modelName1",
                 baseUrl = "https://api.openai.com/v1",
                 temperature = "1F",
-                prompt = "prompt1"
             ),
+            questionAnsweringPrompt = PromptTemplate(template = "prompt template"),
             emSetting = OpenAIEMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 baseUrl = "https://api.openai.com/v1",
@@ -92,13 +93,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() {
             "namespace1",
             "botId2",
             false,
-            llmSetting = OpenAILLMSetting(
+            questionAnsweringLlmSetting = OpenAILLMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 model = "modelName1",
                 temperature = "1F",
                 baseUrl = "https://api.openai.com/v1",
-                prompt = "prompt1"
             ),
+            questionAnsweringPrompt = PromptTemplate(template = "prompt template"),
             emSetting = OpenAIEMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 baseUrl = "https://api.openai.com/v1",
@@ -126,13 +127,13 @@ internal class BotRAGConfigurationMongoDAOTest : AbstractTest() {
             "namespace1",
             "botId1",
             false,
-            llmSetting = OpenAILLMSetting(
+            questionAnsweringLlmSetting = OpenAILLMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 model = "modelName1",
                 temperature = "1F",
                 baseUrl = "https://api.openai.com/v1",
-                prompt = "prompt1"
             ),
+            questionAnsweringPrompt = PromptTemplate(template = "prompt template"),
             emSetting = OpenAIEMSetting(
                 apiKey = RawSecretKey("apiKey1"),
                 baseUrl = "https://api.openai.com/v1",
diff --git a/docs/docs/en/dev/gen_ai_orchestrator/api.md b/docs/docs/en/dev/gen_ai_orchestrator/api.md
new file mode 100644
index 0000000000..5f726a1a49
--- /dev/null
+++ b/docs/docs/en/dev/gen_ai_orchestrator/api.md
@@ -0,0 +1,442 @@
+<a id="top"></a>
+# Tock LLM Orchestrator APIs
+| #  | Resource                                        | Description                                                   | HTTP Method   | Query params | Body - Response                             |
+|----|-------------------------------------------------|---------------------------------------------------------------|---------------|--------------|---------------------------------------------|
+| 01 | **/llm-providers**                              | Get all Large Language Model providers covered                | `GET`         |              | [Voir](#api-llm-providers)                  |
+| 02 | **/llm-providers/{provider-id}**                | Get a specific Large Language Model provider covered          | `GET`         |              | [Voir](#api-llm-providers-get-one)          |
+| 03 | **/llm-providers/{provider-id}/setting**        | Get an example for a specific Large Language Model setting    | `GET`         |              | [Voir](#api-llm-providers-get-setting)      |
+| 04 | **/llm-providers/{provider-id}/setting/status** | Check the setting for a specific Large Language Model setting | `POST`        |              | [Voir](#api-llm-providers-post-setting)     |
+| 05 | **/em-providers**                               | Get all Embedding Model providers covered                     | `GET`         |              | [Voir](#api-em-providers)                   |
+| 06 | **/em-providers/{provider-id}**                 | Get a specific Embedding Model provider covered               | `GET`         |              | [Voir](#api-em-providers-get-one)           |
+| 07 | **/em-providers/{provider-id}/setting**         | Get an example for a specific Embedding Model setting         | `GET`         |              | [Voir](#api-em-providers-get-setting)       |
+| 08 | **/em-providers/{provider-id}/setting/status**  | Check the setting for a specific Embedding Model setting      | `POST`        |              | [Voir](#api-em-providers-post-setting)      |
+| 09 | **/rag**                                        | Ask question by using a knowledge base (documents) as context | `POST`        | debug=true   | [Voir](#api-rag)                            |
+| 10 | **/completion/sentence-generation**             | Generate sentences                                            | `POST`        | debug=true   | [Voir](#api-completion-sentence-generation) |
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-01 : `[GET]` /llm-providers
+<a id="api-llm-providers"></a>
+#### Response
+
+```python
+class LLMProvidersResponse(BaseModel)
+    providers: list[LLMProvider]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-02 : `[GET]` /llm-providers/{provider-id}
+<a id="api-llm-providers-get-one"></a>
+#### Response
+
+```python
+class LLMProviderResponse(BaseModel):
+    provider: LLMProvider
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-03 : `[GET]` /llm-providers/{provider-id}/setting/example
+<a id="api-llm-providers-get-setting"></a>
+#### Response
+
+```python
+class LLMProviderSettingExampleResponse(BaseModel):
+    setting: LLMSetting
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-04 : `[POST]` /llm-providers/{provider-id}/setting/status
+<a id="api-llm-providers-post-setting"></a>
+#### Body
+
+```python
+class LLMProviderSettingStatusQuery(BaseModel):
+    setting: LLMSetting
+```
+#### Response
+
+```python
+class LLMProviderSettingStatusResponse(BaseModel):
+    valid: bool,
+    errors: list[Error]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-05 : `[GET]` /em-providers
+<a id="api-em-providers"></a>
+#### Response
+
+```python
+class EMProvidersResponse(BaseModel)
+    providers: list[LLMProvider]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-06 : `[GET]` /em-providers/{provider-id}
+<a id="api-em-providers-get-one"></a>
+#### Response
+
+```python
+class EMProviderResponse(BaseModel):
+    provider: LLMProvider
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-07 : `[GET]` /em-providers/{provider-id}/setting/example
+<a id="api-em-providers-get-setting"></a>
+#### Response
+
+```python
+class EMProviderSettingExampleResponse(BaseModel):
+    setting: EMSetting
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-08 : `[POST]` /em-providers/{provider-id}/setting/status
+<a id="api-em-providers-post-setting"></a>
+#### Body
+
+```python
+class EMProviderSettingStatusQuery(BaseModel):
+    setting: EMSetting
+```
+#### Response
+
+```python
+class EMProviderSettingStatusResponse(BaseModel):
+    valid: bool,
+    errors: list[Error]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-09 : `[POST]` /rag
+<a id="api-rag"></a>
+#### Body :
+
+```python
+class RagQuery(BaseModel):
+    dialog: Optional[DialogDetails]
+    question_condensing_llm_setting: LLMSetting
+    question_condensing_prompt: Optional[PromptTemplate]
+    question_answering_llm_setting: LLMSetting
+    question_answering_prompt: Optional[PromptTemplate]
+    embedding_question_em_setting: EMSetting
+    guardrail_setting: Optional[GuardrailSetting]
+    observability_setting: Optional[ObservabilitySetting]
+    compressor_setting: Optional[DocumentCompressorSetting]
+    document_index_name: str
+    document_search_params: DocumentSearchParams
+```
+
+#### Response :
+
+```python
+class RagResponse(BaseModel):
+    answer: TextWithFootnotes
+    debug: Optional[Any] = None
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-10 : `[POST]` /completion/sentence-generation
+<a id="api-completion-sentence-generation"></a>
+#### Body :
+
+```python
+class SentenceGenerationQuery(BaseModel):
+    llm_setting: LLMSetting
+```
+
+#### Response :
+
+```python
+class SentenceGenerationResponse(BaseModel):
+    sentences: list[str]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Schemas
+```python
+class LLMProvider(str, Enum):
+    OPEN_AI = 'OpenAI'
+    AZURE_OPEN_AI_SERVICE = 'AzureOpenAIService'
+
+class BaseLLMSetting(BaseModel):
+    provider: LLMProvider
+    api_key: str
+    temperature: str
+
+class BaseEMSetting(BaseModel):
+    provider: LLMProvider
+    api_key: str
+
+class OpenAILLMSetting(BaseLLMSetting):
+    provider: Literal[LLMProvider.OPEN_AI]
+    model: str
+
+class AzureOpenAILLMSetting(BaseLLMSetting):
+    provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE]
+    deployment_name: str
+    model: Optional[str]
+    api_base: str
+    api_version: str
+
+LLMSetting = Annotated[
+    Union[OpenAILLMSetting, AzureOpenAILLMSetting],
+    Body(discriminator='provider')
+]
+
+class OpenAIEMSetting(BaseEMSetting):
+    provider: Literal[LLMProvider.OPEN_AI]
+    model: str
+
+class AzureOpenAIEMSetting(BaseEMSetting):
+    provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE]
+    deployment_name: str
+    model: Optional[str]
+    api_base: str
+    api_version: str
+
+EMSetting = Annotated[
+    Union[OpenAIEMSetting, AzureOpenAIEMSetting],
+    Body(discriminator='provider')
+]
+
+class VectorStoreProvider(str, Enum):
+    OPEN_SEARCH = 'OpenSearch'
+
+class BaseVectorStoreSearchParams(ABC, BaseModel):
+    provider: VectorStoreProvider
+
+
+class OpenSearchParams(BaseVectorStoreSearchParams):
+    provider: Literal[VectorStoreProvider.OPEN_SEARCH]
+    k: int
+    filter: List[OpenSearchTermParams]
+
+class OpenSearchTermParams(BaseModel):
+    term: dict
+
+DocumentSearchParams = Annotated[
+    Union[OpenSearchParams], Body(discriminator='provider')
+]
+
+class Footnote(BaseModel):
+    identifier: str
+    title: str
+    url: Optional[str] = None
+
+class TextWithFootnotes(BaseModel):
+    text: str
+    footnotes: list[Footnote]
+
+class DialogDetails(BaseModel):
+    dialog_id: Optional[str]
+    user_id: Optional[str]
+    history: list[ChatMessage]
+    tags: list[str]
+
+class ChatMessageType(str, Enum):
+    USER = 'HUMAN'
+    AI = 'AI'
+
+class ChatMessage(BaseModel):
+    text: str
+    type: ChatMessageType
+
+class PromptTemplate(BaseModel):
+    formatter: PromptFormatter
+    template: str
+    inputs: dict
+
+class PromptFormatter(str, Enum):
+    F_STRING = 'f-string'
+    JINJA2 = 'jinja2'
+
+class Error(BaseModel):
+    code: str
+    message: str
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Errors
+#### General :
+```json
+    [
+  {
+    "code": 1000,
+    "message": "Unknown AI provider.",
+    "detail": null,
+    "info": {
+      "provider": "TOTO",
+      "error": "NotFoundError",
+      "cause": "'TOTO' is not accepted. Accepted values are : ['OpenAI', 'AzureOpenAIService']",
+      "request": "[POST] http://localhost:8000/llm-providers/TOTO/setting/status"
+    }
+  },
+  {
+    "code": 1001,
+    "message": "Bad query.",
+    "detail": "The request seems to be invalid.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "BadRequestError",
+      "cause": "'AzureOpenAIService' is different from the provider ID 'OpenAI' given for setting.",
+      "request": "[POST] http://localhost:8000/llm-providers/AzureOpenAIService/setting/status"
+    }
+  }
+]
+```
+#### LLMProvider OpenAI :
+```json
+    [
+  {
+    "code": 2001,
+    "message": "Connection error to the AI provider API.",
+    "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "APIConnectionError",
+      "cause": "Connection error.",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2002,
+    "message": "Authentication error to the AI provider API.",
+    "detail": "Check your API key or token and make sure it is correct and active.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "AuthenticationError",
+      "cause": "Error code: 401 - {'error': {'message': 'Incorrect API key provided: ab7*****************************IV4B. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2003,
+    "message": "An AI provider resource was not found.",
+    "detail": "The request URL base is correct, but the path or a query parameter is not.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'message': 'This is not a chat model and thus not supported in the v1/chat/completions endpoint. Did you mean to use v1/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2004,
+    "message": "Unknown AI provider model.",
+    "detail": "Consult the official documentation for accepted values.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'message': 'The model `gpt-3.5-TOTO` does not exist', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2007,
+    "message": "The model's context length has been exceeded.",
+    "detail": "Reduce the length of the prompt message.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "BadRequestError",
+      "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 4097 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  }
+]
+```
+
+#### LLMProvider AzureOpenAIService :
+```json
+    [
+  {
+    "code": 2001,
+    "message": "Connection error to the AI provider API.",
+    "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "APIConnectionError",
+      "cause": "Connection error.",
+      "request": "[POST] https://conversationnel-api-arkea.azure-oapi.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2002,
+    "message": "Authentication error to the AI provider API.",
+    "detail": "Check your API key or token and make sure it is correct and active.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "AuthenticationError",
+      "cause": "Error code: 401 - {'statusCode': 401, 'message': 'Access denied due to invalid subscription key. Make sure to provide a valid key for an active subscription.'}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2003,
+    "message": "An AI provider resource was not found.",
+    "detail": "The request URL base is correct, but the path or a query parameter is not.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-toto"
+    }
+  },
+  {
+    "code": 2005,
+    "message": "Unknown AI provider deployment.",
+    "detail": "Consult the official documentation for accepted values.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4f/chat/completions?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2006,
+    "message": "AI provider API error.",
+    "detail": "Bad request.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "BadRequestError",
+      "cause": "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The embeddings operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net//openai/deployments/squadconv-gpt4/embeddings?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2007,
+    "message": "The model's context length has been exceeded.",
+    "detail": "Reduce the length of the prompt message.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "BadRequestError",
+      "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 8192 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview"
+    }
+  }
+]
+```
diff --git a/docs/docs/en/user/studio/gen-ai/gen-ai.md b/docs/docs/en/user/studio/gen-ai/gen-ai.md
index 09f8a44c46..f8a9959061 100644
--- a/docs/docs/en/user/studio/gen-ai/gen-ai.md
+++ b/docs/docs/en/user/studio/gen-ai/gen-ai.md
@@ -4,14 +4,14 @@ title: Gen AI
 
 # Gen AI
 
-Découvrez l'IA générative sur Tock :
+Discover generative AI on Tock:
 
-- [Le menu _Gen AI / RAG Settings_](../../../user/studio/gen-ai/features/gen-ai-feature-rag.md)
-- [Le menu _Gen AI / RAG Exclusions_](../gen-ai/features/gen-ai-feature-rag-exclusion.md)
-- [Le menu _Gen AI / Sentence Generation_](../../../user/studio/gen-ai/features/gen-ai-feature-sentence-generation.md)
-- [Le menu _Gen AI / Observability Settings_](../../../user/studio/gen-ai/features/gen-ai-feature-observability.md)
-- [Le menu _Gen AI / Vector Store Settings_](../../../user/studio/gen-ai/features/gen-ai-feature-vector-store.md)
+- [The _Gen AI / RAG Settings_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-rag.md)
+- [The _Gen AI / RAG Exclusions_ menu](../gen-ai/features/gen-ai-feature-rag-exclusion.md)
+- [The _Gen AI / Sentence Generation_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-sentence-generation.md)
+- [The _Gen AI / Observability Settings_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-observability.md)
+- [The _Gen AI / Vector Store Settings_ menu](../../../user/studio/gen-ai/features/gen-ai-feature-vector-store.md)
 
-Démo : 
+Demo:
 
-[![RAG et LLM Open Source ? Vers des IA Génératives plus responsables](https://img.youtube.com/vi/lYmUeYoVkwc/0.jpg)](https://youtu.be/lYmUeYoVkwc)
+[![Open Source LLM and RAG - Towards more responsible Generative AI](https://img.youtube.com/vi/lYmUeYoVkwc/0.jpg)](https://youtu.be/lYmUeYoVkwc)
diff --git a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md
index 0163f75eed..88cd4d346c 100644
--- a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md
+++ b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md
@@ -2,16 +2,15 @@
 title: Fournisseurs des modèles LLM et d'Embedding
 ---
 
-# Fournisseurs des modèles LLM et d'Embedding
-
-Voici la liste des fournisseurs des modèles LLM et d'Embedding pris en compte par Tock :
+# LLM and Embedding model providers
 
+Here is the list of LLM and Embedding model suppliers supported by Tock:
 <table>
 <thead>
 <tr>
-<th style="font-weight:bold">Fournisseur de l'IA</th>
-<th style="font-weight:bold">Configuration du LLM</th>
-<th style="font-weight:bold">Configuration de l'Embedding</th>
+<th style="font-weight:bold">AI provider</th>
+<th style="font-weight:bold">LLM configuration</th>
+<th style="font-weight:bold">Embedding configuration</th>
 </tr>
 </thead>
 <tbody>
@@ -29,7 +28,6 @@ OpenAI<br/>
     "value": "aebb4b****************7b25e3371"
   },
   "temperature": "1.0",
-  "prompt": "Customized prompt for the use case",
   "model": "gpt-3.5-turbo"
 }
 </pre>
@@ -61,7 +59,6 @@ AzureOpenAIService <br />
     "value": "aebb4b****************7b25e3371"
   },
   "temperature": "1.0",
-  "prompt": "Customized prompt for the use case",
   "api_base": "https://custom-api-name.azure-api.net",
   "deployment_name": "custom-deployment-name",
   "model": "gpt-4o",
diff --git a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md
index 059fc1d0ee..8462a0b298 100644
--- a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md
+++ b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-observability.md
@@ -2,14 +2,14 @@
 title: Fournisseurs d'observabilité des LLMs
 ---
 
-# Fournisseurs d'observabilité des LLMs.
+# LLM observability providers.
 
-Voici la liste des fournisseurs d'observabilité des LLM pris en compte par Tock :
+Here is the list of LLM observability providers supported by Tock:
 
 <table>
 <thead>
 <tr>
-<th style="font-weight:bold">Fournisseur d'observabilité</th>
+<th style="font-weight:bold">LLM observability providers</th>
 <th style="font-weight:bold">Configuration</th>
 </tr>
 </thead>
diff --git a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md
index dbefd650e8..3225650bad 100644
--- a/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md
+++ b/docs/docs/en/user/studio/gen-ai/providers/gen-ai-provider-vector-store.md
@@ -2,14 +2,14 @@
 title: Fournisseurs de base vectorielle
 ---
 
-# Fournisseurs de base vectorielle.
+# Vector base providers.
 
-Voici la liste des fournisseurs de base vectorielle pris en compte par Tock :
+Here is the list of vector base providers supported by Tock:
 
 <table>
 <thead>
 <tr>
-<th style="font-weight:bold">Fournisseur de base vectorielle</th>
+<th style="font-weight:bold">Vector base providers</th>
 <th style="font-weight:bold">Configuration</th>
 </tr>
 </thead>
diff --git a/docs/docs/fr/dev/gen_ai_orchestrator/api.md b/docs/docs/fr/dev/gen_ai_orchestrator/api.md
new file mode 100644
index 0000000000..5f726a1a49
--- /dev/null
+++ b/docs/docs/fr/dev/gen_ai_orchestrator/api.md
@@ -0,0 +1,442 @@
+<a id="top"></a>
+# Tock LLM Orchestrator APIs
+| #  | Resource                                        | Description                                                   | HTTP Method   | Query params | Body - Response                             |
+|----|-------------------------------------------------|---------------------------------------------------------------|---------------|--------------|---------------------------------------------|
+| 01 | **/llm-providers**                              | Get all Large Language Model providers covered                | `GET`         |              | [Voir](#api-llm-providers)                  |
+| 02 | **/llm-providers/{provider-id}**                | Get a specific Large Language Model provider covered          | `GET`         |              | [Voir](#api-llm-providers-get-one)          |
+| 03 | **/llm-providers/{provider-id}/setting**        | Get an example for a specific Large Language Model setting    | `GET`         |              | [Voir](#api-llm-providers-get-setting)      |
+| 04 | **/llm-providers/{provider-id}/setting/status** | Check the setting for a specific Large Language Model setting | `POST`        |              | [Voir](#api-llm-providers-post-setting)     |
+| 05 | **/em-providers**                               | Get all Embedding Model providers covered                     | `GET`         |              | [Voir](#api-em-providers)                   |
+| 06 | **/em-providers/{provider-id}**                 | Get a specific Embedding Model provider covered               | `GET`         |              | [Voir](#api-em-providers-get-one)           |
+| 07 | **/em-providers/{provider-id}/setting**         | Get an example for a specific Embedding Model setting         | `GET`         |              | [Voir](#api-em-providers-get-setting)       |
+| 08 | **/em-providers/{provider-id}/setting/status**  | Check the setting for a specific Embedding Model setting      | `POST`        |              | [Voir](#api-em-providers-post-setting)      |
+| 09 | **/rag**                                        | Ask question by using a knowledge base (documents) as context | `POST`        | debug=true   | [Voir](#api-rag)                            |
+| 10 | **/completion/sentence-generation**             | Generate sentences                                            | `POST`        | debug=true   | [Voir](#api-completion-sentence-generation) |
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-01 : `[GET]` /llm-providers
+<a id="api-llm-providers"></a>
+#### Response
+
+```python
+class LLMProvidersResponse(BaseModel)
+    providers: list[LLMProvider]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-02 : `[GET]` /llm-providers/{provider-id}
+<a id="api-llm-providers-get-one"></a>
+#### Response
+
+```python
+class LLMProviderResponse(BaseModel):
+    provider: LLMProvider
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-03 : `[GET]` /llm-providers/{provider-id}/setting/example
+<a id="api-llm-providers-get-setting"></a>
+#### Response
+
+```python
+class LLMProviderSettingExampleResponse(BaseModel):
+    setting: LLMSetting
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-04 : `[POST]` /llm-providers/{provider-id}/setting/status
+<a id="api-llm-providers-post-setting"></a>
+#### Body
+
+```python
+class LLMProviderSettingStatusQuery(BaseModel):
+    setting: LLMSetting
+```
+#### Response
+
+```python
+class LLMProviderSettingStatusResponse(BaseModel):
+    valid: bool,
+    errors: list[Error]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-05 : `[GET]` /em-providers
+<a id="api-em-providers"></a>
+#### Response
+
+```python
+class EMProvidersResponse(BaseModel)
+    providers: list[LLMProvider]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-06 : `[GET]` /em-providers/{provider-id}
+<a id="api-em-providers-get-one"></a>
+#### Response
+
+```python
+class EMProviderResponse(BaseModel):
+    provider: LLMProvider
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-07 : `[GET]` /em-providers/{provider-id}/setting/example
+<a id="api-em-providers-get-setting"></a>
+#### Response
+
+```python
+class EMProviderSettingExampleResponse(BaseModel):
+    setting: EMSetting
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-08 : `[POST]` /em-providers/{provider-id}/setting/status
+<a id="api-em-providers-post-setting"></a>
+#### Body
+
+```python
+class EMProviderSettingStatusQuery(BaseModel):
+    setting: EMSetting
+```
+#### Response
+
+```python
+class EMProviderSettingStatusResponse(BaseModel):
+    valid: bool,
+    errors: list[Error]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-09 : `[POST]` /rag
+<a id="api-rag"></a>
+#### Body :
+
+```python
+class RagQuery(BaseModel):
+    dialog: Optional[DialogDetails]
+    question_condensing_llm_setting: LLMSetting
+    question_condensing_prompt: Optional[PromptTemplate]
+    question_answering_llm_setting: LLMSetting
+    question_answering_prompt: Optional[PromptTemplate]
+    embedding_question_em_setting: EMSetting
+    guardrail_setting: Optional[GuardrailSetting]
+    observability_setting: Optional[ObservabilitySetting]
+    compressor_setting: Optional[DocumentCompressorSetting]
+    document_index_name: str
+    document_search_params: DocumentSearchParams
+```
+
+#### Response :
+
+```python
+class RagResponse(BaseModel):
+    answer: TextWithFootnotes
+    debug: Optional[Any] = None
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+#### API-10 : `[POST]` /completion/sentence-generation
+<a id="api-completion-sentence-generation"></a>
+#### Body :
+
+```python
+class SentenceGenerationQuery(BaseModel):
+    llm_setting: LLMSetting
+```
+
+#### Response :
+
+```python
+class SentenceGenerationResponse(BaseModel):
+    sentences: list[str]
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Schemas
+```python
+class LLMProvider(str, Enum):
+    OPEN_AI = 'OpenAI'
+    AZURE_OPEN_AI_SERVICE = 'AzureOpenAIService'
+
+class BaseLLMSetting(BaseModel):
+    provider: LLMProvider
+    api_key: str
+    temperature: str
+
+class BaseEMSetting(BaseModel):
+    provider: LLMProvider
+    api_key: str
+
+class OpenAILLMSetting(BaseLLMSetting):
+    provider: Literal[LLMProvider.OPEN_AI]
+    model: str
+
+class AzureOpenAILLMSetting(BaseLLMSetting):
+    provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE]
+    deployment_name: str
+    model: Optional[str]
+    api_base: str
+    api_version: str
+
+LLMSetting = Annotated[
+    Union[OpenAILLMSetting, AzureOpenAILLMSetting],
+    Body(discriminator='provider')
+]
+
+class OpenAIEMSetting(BaseEMSetting):
+    provider: Literal[LLMProvider.OPEN_AI]
+    model: str
+
+class AzureOpenAIEMSetting(BaseEMSetting):
+    provider: Literal[LLMProvider.AZURE_OPEN_AI_SERVICE]
+    deployment_name: str
+    model: Optional[str]
+    api_base: str
+    api_version: str
+
+EMSetting = Annotated[
+    Union[OpenAIEMSetting, AzureOpenAIEMSetting],
+    Body(discriminator='provider')
+]
+
+class VectorStoreProvider(str, Enum):
+    OPEN_SEARCH = 'OpenSearch'
+
+class BaseVectorStoreSearchParams(ABC, BaseModel):
+    provider: VectorStoreProvider
+
+
+class OpenSearchParams(BaseVectorStoreSearchParams):
+    provider: Literal[VectorStoreProvider.OPEN_SEARCH]
+    k: int
+    filter: List[OpenSearchTermParams]
+
+class OpenSearchTermParams(BaseModel):
+    term: dict
+
+DocumentSearchParams = Annotated[
+    Union[OpenSearchParams], Body(discriminator='provider')
+]
+
+class Footnote(BaseModel):
+    identifier: str
+    title: str
+    url: Optional[str] = None
+
+class TextWithFootnotes(BaseModel):
+    text: str
+    footnotes: list[Footnote]
+
+class DialogDetails(BaseModel):
+    dialog_id: Optional[str]
+    user_id: Optional[str]
+    history: list[ChatMessage]
+    tags: list[str]
+
+class ChatMessageType(str, Enum):
+    USER = 'HUMAN'
+    AI = 'AI'
+
+class ChatMessage(BaseModel):
+    text: str
+    type: ChatMessageType
+
+class PromptTemplate(BaseModel):
+    formatter: PromptFormatter
+    template: str
+    inputs: dict
+
+class PromptFormatter(str, Enum):
+    F_STRING = 'f-string'
+    JINJA2 = 'jinja2'
+
+class Error(BaseModel):
+    code: str
+    message: str
+```
+
+---
+<p align="right">(<a href="#top">back to top</a>)</p>
+
+## Errors
+#### General :
+```json
+    [
+  {
+    "code": 1000,
+    "message": "Unknown AI provider.",
+    "detail": null,
+    "info": {
+      "provider": "TOTO",
+      "error": "NotFoundError",
+      "cause": "'TOTO' is not accepted. Accepted values are : ['OpenAI', 'AzureOpenAIService']",
+      "request": "[POST] http://localhost:8000/llm-providers/TOTO/setting/status"
+    }
+  },
+  {
+    "code": 1001,
+    "message": "Bad query.",
+    "detail": "The request seems to be invalid.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "BadRequestError",
+      "cause": "'AzureOpenAIService' is different from the provider ID 'OpenAI' given for setting.",
+      "request": "[POST] http://localhost:8000/llm-providers/AzureOpenAIService/setting/status"
+    }
+  }
+]
+```
+#### LLMProvider OpenAI :
+```json
+    [
+  {
+    "code": 2001,
+    "message": "Connection error to the AI provider API.",
+    "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "APIConnectionError",
+      "cause": "Connection error.",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2002,
+    "message": "Authentication error to the AI provider API.",
+    "detail": "Check your API key or token and make sure it is correct and active.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "AuthenticationError",
+      "cause": "Error code: 401 - {'error': {'message': 'Incorrect API key provided: ab7*****************************IV4B. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2003,
+    "message": "An AI provider resource was not found.",
+    "detail": "The request URL base is correct, but the path or a query parameter is not.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'message': 'This is not a chat model and thus not supported in the v1/chat/completions endpoint. Did you mean to use v1/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2004,
+    "message": "Unknown AI provider model.",
+    "detail": "Consult the official documentation for accepted values.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'message': 'The model `gpt-3.5-TOTO` does not exist', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  },
+  {
+    "code": 2007,
+    "message": "The model's context length has been exceeded.",
+    "detail": "Reduce the length of the prompt message.",
+    "info": {
+      "provider": "OpenAI",
+      "error": "BadRequestError",
+      "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 4097 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
+      "request": "[POST] https://api.openai.com/v1/chat/completions"
+    }
+  }
+]
+```
+
+#### LLMProvider AzureOpenAIService :
+```json
+    [
+  {
+    "code": 2001,
+    "message": "Connection error to the AI provider API.",
+    "detail": "Check the requested URL, your network settings, proxy configuration, SSL certificates, or firewall rules.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "APIConnectionError",
+      "cause": "Connection error.",
+      "request": "[POST] https://conversationnel-api-arkea.azure-oapi.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2002,
+    "message": "Authentication error to the AI provider API.",
+    "detail": "Check your API key or token and make sure it is correct and active.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "AuthenticationError",
+      "cause": "Error code: 401 - {'statusCode': 401, 'message': 'Access denied due to invalid subscription key. Make sure to provide a valid key for an active subscription.'}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2003,
+    "message": "An AI provider resource was not found.",
+    "detail": "The request URL base is correct, but the path or a query parameter is not.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-toto"
+    }
+  },
+  {
+    "code": 2005,
+    "message": "Unknown AI provider deployment.",
+    "detail": "Consult the official documentation for accepted values.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "NotFoundError",
+      "cause": "Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4f/chat/completions?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2006,
+    "message": "AI provider API error.",
+    "detail": "Bad request.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "BadRequestError",
+      "cause": "Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The embeddings operation does not work with the specified model, gpt-4. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net//openai/deployments/squadconv-gpt4/embeddings?api-version=2023-03-15-preview"
+    }
+  },
+  {
+    "code": 2007,
+    "message": "The model's context length has been exceeded.",
+    "detail": "Reduce the length of the prompt message.",
+    "info": {
+      "provider": "AzureOpenAIService",
+      "error": "BadRequestError",
+      "cause": "Error code: 400 - {'error': {'message': \"This model's maximum context length is 8192 tokens. However, your messages resulted in 29167 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
+      "request": "[POST] https://conversationnel-api-arkea.azure-api.net///openai/deployments/squadconv-gpt4/chat/completions?api-version=2023-03-15-preview"
+    }
+  }
+]
+```
diff --git a/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md b/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md
index 0163f75eed..1a2e9301b9 100644
--- a/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md
+++ b/docs/docs/fr/user/studio/gen-ai/providers/gen-ai-provider-llm-and-embedding.md
@@ -29,7 +29,6 @@ OpenAI<br/>
     "value": "aebb4b****************7b25e3371"
   },
   "temperature": "1.0",
-  "prompt": "Customized prompt for the use case",
   "model": "gpt-3.5-turbo"
 }
 </pre>
@@ -61,7 +60,6 @@ AzureOpenAIService <br />
     "value": "aebb4b****************7b25e3371"
   },
   "temperature": "1.0",
-  "prompt": "Customized prompt for the use case",
   "api_base": "https://custom-api-name.azure-api.net",
   "deployment_name": "custom-deployment-name",
   "model": "gpt-4o",
diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt
index 42560a41a0..eb85c4cb4f 100644
--- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt
+++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/PromptTemplate.kt
@@ -21,8 +21,8 @@ enum class Formatter(val id: String) {
     JINJA2(id = "jinja2");
 
 }
-class PromptTemplate (
-    val formatter: String,
+data class PromptTemplate (
+    val formatter: String = Formatter.JINJA2.id,
     val template: String,
-    val inputs:  Map<String, Any>
+    val inputs:  Map<String, Any> = emptyMap()
 )
\ No newline at end of file
diff --git a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt
index dfb0bafddb..2e6e4564ba 100644
--- a/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt
+++ b/gen-ai/orchestrator-client/src/main/kotlin/ai/tock/genai/orchestratorclient/requests/RAGQuery.kt
@@ -23,9 +23,9 @@ import ai.tock.genai.orchestratorcore.models.vectorstore.DocumentSearchParamsBas
 import ai.tock.genai.orchestratorcore.models.vectorstore.VectorStoreSetting
 
 data class RAGQuery(
-    // val condenseQuestionLlmSetting: LLMSetting,
-    // val condenseQuestionPrompt: PromptTemplate,
     val dialog: DialogDetails?,
+    val questionCondensingLlmSetting: LLMSetting?,
+    val questionCondensingPrompt: PromptTemplate?,
     val questionAnsweringLlmSetting: LLMSetting,
     val questionAnsweringPrompt: PromptTemplate,
     val embeddingQuestionEmSetting: EMSetting,
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt
index dfd2e0baef..ce6ef81f52 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/LLMSettingMapper.kt
@@ -37,7 +37,6 @@ object LLMSettingMapper {
                     OpenAILLMSetting(
                         apiKey = SecurityUtils.fetchSecretKeyValue(apiKey),
                         temperature = temperature,
-                        prompt = prompt,
                         model = model,
                         baseUrl = baseUrl
                     )
@@ -45,7 +44,6 @@ object LLMSettingMapper {
                     AzureOpenAILLMSetting(
                         apiKey = SecurityUtils.fetchSecretKeyValue(apiKey),
                         temperature = temperature,
-                        prompt = prompt,
                         apiBase = apiBase,
                         deploymentName = deploymentName,
                         model = model,
@@ -54,7 +52,6 @@ object LLMSettingMapper {
                 is OllamaLLMSetting ->
                     OllamaLLMSetting(
                         temperature = temperature,
-                        prompt = prompt,
                         model = model,
                         baseUrl = baseUrl
                     )
@@ -78,7 +75,6 @@ object LLMSettingMapper {
                     OpenAILLMSetting(
                         apiKey = SecurityUtils.createSecretKey(namespace, botId, feature, apiKey),
                         temperature = temperature,
-                        prompt = prompt,
                         model = model,
                         baseUrl = baseUrl
                     )
@@ -86,14 +82,17 @@ object LLMSettingMapper {
                     AzureOpenAILLMSetting(
                         SecurityUtils.createSecretKey(namespace, botId, feature, apiKey),
                         temperature = temperature,
-                        prompt = prompt,
                         apiBase = apiBase,
                         deploymentName = deploymentName,
                         apiVersion = apiVersion,
                         model = model
                     )
                 is OllamaLLMSetting ->
-                    OllamaLLMSetting(temperature, prompt, model, baseUrl)
+                    OllamaLLMSetting(
+                        temperature = temperature,
+                        model = model,
+                        baseUrl = baseUrl
+                    )
                 else ->
                     throw IllegalArgumentException("Unsupported LLM Setting")
             }
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt
index b3b97e25f8..8735486df1 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/mappers/VectorStoreSettingMapper.kt
@@ -37,11 +37,11 @@ object VectorStoreSettingMapper {
             when(this){
                 is OpenSearchVectorStoreSetting -> {
                     val fetchedPassword = SecurityUtils.fetchSecretKeyValue(password)
-                    return OpenSearchVectorStoreSetting(host, port, username, fetchedPassword, k)
+                    return OpenSearchVectorStoreSetting(host, port, username, fetchedPassword)
                 }
                 is PGVectorStoreSetting -> {
                     val fetchedPassword = SecurityUtils.fetchSecretKeyValue(password)
-                    return PGVectorStoreSetting(host, port, username, fetchedPassword, k, database)
+                    return PGVectorStoreSetting(host, port, username, fetchedPassword, database)
                 }
                 else ->
                     throw IllegalArgumentException("Unsupported VectorStore Setting")
@@ -61,11 +61,11 @@ object VectorStoreSettingMapper {
             when(this){
                 is OpenSearchVectorStoreSetting -> {
                     val secretPassword = SecurityUtils.createSecretKey(namespace, botId, feature, password)
-                    return OpenSearchVectorStoreSetting(host, port, username, secretPassword, k)
+                    return OpenSearchVectorStoreSetting(host, port, username, secretPassword)
                 }
                 is PGVectorStoreSetting -> {
                     val secretPassword = SecurityUtils.createSecretKey(namespace, botId, feature, password)
-                    return PGVectorStoreSetting(host, port, username, secretPassword, k, database)
+                    return PGVectorStoreSetting(host, port, username, secretPassword, database)
                 }
                 else ->
                     throw IllegalArgumentException("Unsupported VectorStore Setting")
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt
index 45b2e69c87..333015c2d0 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/Constants.kt
@@ -32,6 +32,7 @@ object Constants {
     private const val GEN_AI_RAG="$GEN_AI/RAG"
     private const val GEN_AI_COMPLETION="$GEN_AI/COMPLETION"
 
+    const val GEN_AI_RAG_QUESTION_CONDENSING="$GEN_AI_RAG/questionCondensing"
     const val GEN_AI_RAG_QUESTION_ANSWERING="$GEN_AI_RAG/questionAnswering"
     const val GEN_AI_RAG_EMBEDDING_QUESTION="$GEN_AI_RAG/embeddingQuestion"
 
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt
index aa1d33a32f..cb05ba484b 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/AzureOpenAILLMSetting.kt
@@ -19,7 +19,8 @@ package ai.tock.genai.orchestratorcore.models.llm
 data class AzureOpenAILLMSetting<T>(
     override val apiKey: T,
     override val temperature: String,
-    override val prompt: String,
+    @Deprecated("use PromptTemplate#prompt")
+    override val prompt: String? = null,
     val apiBase: String,
     val deploymentName: String,
     val apiVersion: String,
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt
index d2ca82f105..ec13ba14fb 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/LLMSettingBase.kt
@@ -38,7 +38,8 @@ abstract class LLMSettingBase<T>(
     val provider: LLMProvider,
     open val apiKey: T? = null,
     open val temperature: String,
-    open val prompt: String
+    @Deprecated("use PromptTemplate#prompt")
+    open val prompt: String? = null
 ) {
     abstract fun copyWithTemperature(temperature: String): LLMSettingBase<T>
 }
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt
index caa577996b..7c03229a03 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OllamaLLMSetting.kt
@@ -18,7 +18,8 @@ package ai.tock.genai.orchestratorcore.models.llm
 
 data class OllamaLLMSetting<T>(
     override val temperature: String,
-    override val prompt: String,
+    @Deprecated("use PromptTemplate#prompt")
+    override val prompt: String? = null,
     val model: String,
     val baseUrl: String,
 ) : LLMSettingBase<T>(provider = LLMProvider.Ollama, temperature = temperature, prompt = prompt) {
@@ -26,3 +27,5 @@ data class OllamaLLMSetting<T>(
         return this.copy(temperature=temperature)
     }
 }
+
+// TODO MASS : Check Compile + TU (car dernier commit)
\ No newline at end of file
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt
index da8445ff25..4e1805c1df 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/llm/OpenAILLMSetting.kt
@@ -19,7 +19,8 @@ package ai.tock.genai.orchestratorcore.models.llm
 data class OpenAILLMSetting<T>(
     override val apiKey: T,
     override val temperature: String,
-    override val prompt: String,
+    @Deprecated("use PromptTemplate#prompt")
+    override val prompt: String? = null,
     val model: String,
     val baseUrl: String,
 ) : LLMSettingBase<T>(LLMProvider.OpenAI, apiKey, temperature, prompt) {
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt
index 4117229cd2..741ea6fb75 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/OpenSearchVectorStoreSetting.kt
@@ -23,25 +23,23 @@ data class OpenSearchVectorStoreSetting<T>(
     override val port: Int,
     override val username: String,
     override val password: T,
-    override val k: Int,
 ) : VectorStoreSettingBase<T>(
     provider = VectorStoreProvider.OpenSearch,
     host = host,
     port = port,
     username = username,
-    password = password,
-    k = k
+    password = password
 ) {
 
     override fun normalizeDocumentIndexName(namespace: String, botId: String, indexSessionId: String): String =
         OpenSearchUtils.normalizeDocumentIndexName(namespace, botId, indexSessionId)
 
-    override fun getDocumentSearchParams(): OpenSearchParams =
-        OpenSearchParams(k = k, filter = null)
+    override fun getDocumentSearchParams(kNeighborsDocuments: Int): OpenSearchParams =
+        OpenSearchParams(k = kNeighborsDocuments, filter = null)
 }
 
 data class OpenSearchParams(
-    val k: Int = 4,
+    val k: Int,
     val filter: List<Term>? = null
 ) : DocumentSearchParamsBase(VectorStoreProvider.OpenSearch)
 
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt
index 3b14335b95..567c7e949f 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/PGVectorStoreSetting.kt
@@ -23,25 +23,23 @@ data class PGVectorStoreSetting<T>(
     override val port: Int,
     override val username: String,
     override val password: T,
-    override val k: Int,
     val database: String,
 ) : VectorStoreSettingBase<T>(
     provider = VectorStoreProvider.PGVector,
     host = host,
     port = port,
     username = username,
-    password = password,
-    k = k
+    password = password
 ) {
 
     override fun normalizeDocumentIndexName(namespace: String, botId: String, indexSessionId: String): String =
         PGVectorUtils.normalizeDocumentIndexName(namespace, botId, indexSessionId)
 
-    override fun getDocumentSearchParams(): PGVectorParams =
-        PGVectorParams(k = k, filter = null)
+    override fun getDocumentSearchParams(kNeighborsDocuments: Int): PGVectorParams =
+        PGVectorParams(k = kNeighborsDocuments, filter = null)
 }
 
 data class PGVectorParams(
-    val k: Int = 4,
+    val k: Int,
     val filter: Map<String, String>? = null
 ) : DocumentSearchParamsBase(VectorStoreProvider.PGVector)
\ No newline at end of file
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt
index bdb54322bb..af31a360a4 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/models/vectorstore/VectorStoreSettingBase.kt
@@ -37,9 +37,7 @@ abstract class VectorStoreSettingBase<T>(
     open val host: String,
     open val port: Int,
     open val username: String,
-    open val password: T,
-    // The number of documents (neighbors) to return for each vector search
-    open val k: Int,
+    open val password: T
 ){
     /**
      * Normalize the document index name
@@ -51,7 +49,7 @@ abstract class VectorStoreSettingBase<T>(
     /**
      * Get search params (filter) params
      */
-    abstract fun getDocumentSearchParams(): DocumentSearchParamsBase
+    abstract fun getDocumentSearchParams(kNeighborsDocuments: Int): DocumentSearchParamsBase
 }
 
 typealias VectorStoreSettingDTO = VectorStoreSettingBase<String>
diff --git a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt
index 5c276daac8..b86e78794b 100644
--- a/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt
+++ b/gen-ai/orchestrator-core/src/main/kotlin/ai/tock/genai/orchestratorcore/utils/VectorStoreUtils.kt
@@ -23,9 +23,6 @@ import ai.tock.shared.property
 private val vectorStore = property(
     name = "tock_gen_ai_orchestrator_vector_store_provider",
     defaultValue = VectorStoreProvider.OpenSearch.name)
-private val kNeighborsDocuments = intProperty(
-    name = "tock_gen_ai_orchestrator_document_number_neighbors",
-    defaultValue = 1)
 
 typealias DocumentIndexName = String
 
@@ -35,11 +32,12 @@ object VectorStoreUtils {
         namespace: String,
         botId: String,
         indexSessionId: String,
+        kNeighborsDocuments: Int,
         vectorStoreSetting: VectorStoreSetting?,
     ): Pair<DocumentSearchParamsBase, DocumentIndexName> {
 
         vectorStoreSetting?.let {
-            val searchParams = it.getDocumentSearchParams()
+            val searchParams = it.getDocumentSearchParams(kNeighborsDocuments)
             val indexName = it.normalizeDocumentIndexName(namespace, botId, indexSessionId)
             return Pair(searchParams, indexName)
         }
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py
index 49f05c236e..d75b5f06cd 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/configurations/environment/settings.py
@@ -67,11 +67,11 @@ class _Settings(BaseSettings):
     vector_store_database: Optional[str] = None # Only if necessary. Example: PGVector
     vector_store_secret_manager_provider: Optional[SecretManagerProvider] = None
     vector_store_credentials_secret_name: Optional[str] = None
-    """Number of document to retrieve from the Vector Store"""
-    vector_store_k: int = 4
     """Request timeout: set the maximum time (in seconds) for the request to be completed."""
     vector_store_timeout: int = 4
-    vector_store_test_query: str = 'What knowledge do you have?'
+    """Maximum number of documents to be retrieved from the Vector Store"""
+    vector_store_test_max_docs_retrieved: int = 4
+    vector_store_test_query: str = 'Any definition'
 
     """Observability Setting"""
     observability_provider_max_retries: int = 0
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py
index 8bd809b477..b74aeef107 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/models/rag/rag_models.py
@@ -138,19 +138,13 @@ class QADebugData(BaseModel):
 class RagDebugData(QADebugData):
     """A RAG debug data"""
 
-    condense_question_prompt: Optional[str] = Field(
+    question_condensing_prompt: Optional[str] = Field(
         description='The prompt of the question rephrased with the history of the conversation.',
-        examples=[
-            """Given the following conversation and a follow up question,
-        rephrase the follow up question to be a standalone question, in its original language.
-        Chat History:
-        Human: What travel offers are you proposing?
-        Assistant: We offer trips to all of Europe and North Africa.
-        Follow Up Input: I'm interested in going to Morocco
-        Standalone question:"""
-        ],
+        examples=['Given the following conversation, rephrase the follow up question to be a standalone question.'],
     )
-    condense_question: Optional[str] = Field(
+    question_condensing_history: list[ChatMessage] = Field(
+        description="Conversation history, used to reformulate the user's question.")
+    condensed_question: Optional[str] = Field(
         description='The question rephrased with the history of the conversation.',
         examples=['Hello, how to plan a trip to Morocco ?'],
     )
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py
index e6bcd14a06..5951b49464 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/routers/requests/requests.py
@@ -163,11 +163,12 @@ class RagQuery(BaseQuery):
     """The RAG query model"""
 
     dialog: Optional[DialogDetails] = Field(description='The user dialog details.')
-    # condense_question_llm_setting: LLMSetting =
-    #   Field(description="LLM setting, used to condense the user's question.")
-    # condense_question_prompt: PromptTemplate = Field(
-    #         description='Prompt template, used to create a prompt with inputs for jinja and fstring format'
-    #     )
+    question_condensing_llm_setting: Optional[LLMSetting] = Field(
+        description="LLM setting, used to condense the user's question.", default=None)
+    question_condensing_prompt: Optional[PromptTemplate] = Field(
+        description='Prompt template, used to create a prompt with inputs for jinja and fstring format',
+        default = None
+    )
     question_answering_llm_setting: LLMSetting = Field(
         description='LLM setting, used to perform a QA Prompt.'
     )
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py
index b65d5351f2..c3474cb400 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/completion/completion_service.py
@@ -53,7 +53,7 @@ async def generate_and_split_sentences(
     start_time = time.time()
 
     logger.info('Prompt completion - template validation')
-    validate_prompt_template(query.prompt)
+    validate_prompt_template(query.prompt, 'Sentence generation prompt')
 
     parser = NumberedListOutputParser()
     prompt = LangChainPromptTemplate.from_template(
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py
index e85003d9ac..77343c691b 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/callbacks/rag_callback_handler.py
@@ -27,13 +27,14 @@
 class RAGCallbackHandler(BaseCallbackHandler):
     """Customized RAG callback handler that retrieves data from the chain execution."""
 
-    records: Dict[str, Any] = {
-        'chat_prompt': None,
-        'chat_chain_output': None,
-        'rag_prompt': None,
-        'rag_chain_output': None,
-        'documents': None,
-    }
+    def __init__(self):
+        self.records: Dict[str, Any] = {
+            'chat_prompt': None,
+            'chat_chain_output': None,
+            'rag_prompt': None,
+            'rag_chain_output': None,
+            'documents': None,
+        }
 
     def on_chain_start(
         self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py
index a2dd491326..d8ebe5138a 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/pgvector_factory.py
@@ -66,5 +66,5 @@ async def check_vector_store_connection(self) -> bool:
         VectorStore to check the connection independently.
         """
         await self.get_vector_store().asimilarity_search(
-            query=application_settings.vector_store_test_query, k=application_settings.vector_store_k)
+            query=application_settings.vector_store_test_query, k=application_settings.vector_store_test_max_docs_retrieved)
         return True
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py
index f57f838944..51b3a262a5 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/factories/vector_stores/vector_store_factory.py
@@ -75,7 +75,7 @@ async def check_vector_store_setting(self) -> bool:
         """
         logger.info('Invoke vector store provider to check setting')
         documents: List[Document] = await self.get_vector_store().asimilarity_search(
-            query=application_settings.vector_store_test_query, k=application_settings.vector_store_k
+            query=application_settings.vector_store_test_query, k=application_settings.vector_store_test_max_docs_retrieved
         )
         logger.debug('Invocation successful')
         logger.debug('[index: %s], [query: %s], [document count: %s]', self.index_name, application_settings.vector_store_test_query, len(documents))
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py
index 259de72878..1be6050e3f 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/langchain/rag_chain.py
@@ -52,6 +52,8 @@
 from gen_ai_orchestrator.models.observability.observability_trace import (
     ObservabilityTrace,
 )
+from gen_ai_orchestrator.models.prompt.prompt_formatter import PromptFormatter
+from gen_ai_orchestrator.models.prompt.prompt_template import PromptTemplate
 from gen_ai_orchestrator.models.rag.rag_models import (
     ChatMessageType,
     Footnote,
@@ -116,7 +118,7 @@ async def execute_rag_chain(query: RagQuery, debug: bool) -> RagResponse:
 
     inputs = {
         **query.question_answering_prompt.inputs,
-        'chat_history': message_history.messages,
+        'chat_history': message_history.messages
     }
 
     logger.debug(
@@ -224,7 +226,16 @@ def create_rag_chain(query: RagQuery, vector_db_async_mode: Optional[bool] = Tru
         The RAG chain.
     """
 
-    llm_factory = get_llm_factory(setting=query.question_answering_llm_setting)
+    # Log progress and validate prompt template
+    logger.info('RAG chain - Validating LLM prompt template')
+    validate_prompt_template(query.question_answering_prompt, 'Question answering prompt')
+    if query.question_condensing_prompt is not None:
+        validate_prompt_template(query.question_condensing_prompt, 'Question condensing prompt')
+
+    question_condensing_llm_factory = None
+    if query.question_condensing_llm_setting is not None:
+        question_condensing_llm_factory = get_llm_factory(setting=query.question_condensing_llm_setting)
+    question_answering_llm_factory = get_llm_factory(setting=query.question_answering_llm_setting)
     em_factory = get_em_factory(setting=query.embedding_question_em_setting)
     vector_store_factory = get_vector_store_factory(
         setting=query.vector_store_setting,
@@ -239,22 +250,24 @@ def create_rag_chain(query: RagQuery, vector_db_async_mode: Optional[bool] = Tru
     if query.compressor_setting:
         retriever = add_document_compressor(retriever, query.compressor_setting)
 
-    # Log progress and validate prompt template
-    logger.info('RAG chain - Validating LLM prompt template')
-    validate_prompt_template(query.question_answering_prompt)
-
     logger.debug('RAG chain - Document index name: %s', query.document_index_name)
 
     # Build LLM and prompt templates
-    llm = llm_factory.get_language_model()
+    question_condensing_llm = None
+    if question_condensing_llm_factory is not None:
+        question_condensing_llm = question_condensing_llm_factory.get_language_model()
+    question_answering_llm = question_answering_llm_factory.get_language_model()
     rag_prompt = build_rag_prompt(query)
 
     # Construct the RAG chain using the prompt and LLM,
     # This chain will consume the documents retrieved by the retriever as input.
-    rag_chain = construct_rag_chain(llm, rag_prompt)
+    rag_chain = construct_rag_chain(question_answering_llm, rag_prompt)
 
     # Build the chat chain for question contextualization
-    chat_chain = build_question_condensation_chain(llm)
+    chat_chain = build_question_condensation_chain(
+        question_condensing_llm if question_condensing_llm is not None else question_answering_llm,
+        query.question_condensing_prompt
+    )
 
     # Function to contextualize the question based on chat history
     contextualize_question_fn = partial(contextualize_question, chat_chain=chat_chain)
@@ -288,17 +301,24 @@ def construct_rag_chain(llm, rag_prompt):
         "question": lambda inputs: inputs["question"] # Override the user's original question with the condensed one
     } | rag_prompt | llm | StrOutputParser(name="rag_chain_output")
 
-def build_question_condensation_chain(llm) -> ChatPromptTemplate:
+def build_question_condensation_chain(llm, prompt: Optional[PromptTemplate]) -> ChatPromptTemplate:
     """
     Build the chat chain for contextualizing questions.
     """
+    if prompt is None:
+        # Default prompt
+        prompt = PromptTemplate(
+            formatter = PromptFormatter.F_STRING, inputs = {},
+            template = "Given a chat history and the latest user question which might reference context in \
+the chat history, formulate a standalone question which can be understood without the chat history. \
+Do NOT answer the question, just reformulate it if needed and otherwise return it as is.",
+        )
+
     return ChatPromptTemplate.from_messages([
-        ("system", """Given a chat history and the latest user question which might reference context in \
-        the chat history, formulate a standalone question which can be understood without the chat history. \
-        Do NOT answer the question, just reformulate it if needed and otherwise return it as is."""),
+        ("system", prompt.template),
         MessagesPlaceholder(variable_name="chat_history"),
         ("human", "{question}"),
-    ]) | llm | StrOutputParser(name="chat_chain_output")
+    ]).partial(**prompt.inputs) | llm | StrOutputParser(name="chat_chain_output")
 
 def contextualize_question(inputs: dict, chat_chain) -> str:
     """
@@ -328,14 +348,21 @@ def rag_guard(inputs, response, documents_required):
         chain_reply_no_answer = response['answer'] == inputs['no_answer']
 
     if no_docs_but_required:
-        if chain_can_give_no_answer_reply and chain_reply_no_answer:  # We expect the chain to use it's no answer value and it did, it's the expected behavior
+        if chain_can_give_no_answer_reply and chain_reply_no_answer:
+            # We expect the chain to use its non-response value, and it has done so, which is the expected behavior.
             return
         # Everything else isn't expected
         message = 'The RAG system cannot provide an answer when no documents are found and documents are required'
         rag_log(level=ERROR, message=message, inputs=inputs, response=response)
         raise GenAIGuardCheckException(ErrorInfo(cause=message))
 
-    return
+    if chain_reply_no_answer and not no_docs_retrieved:
+        # If the chain responds with its non-response value and the documents are retrieved,
+        # so we remove them from the RAG response.
+        message = 'The RAG gives no answer for user question, but some documents has been found!'
+        rag_log(level=WARNING, message=message, inputs=inputs, response=response)
+        response['documents'] = []
+
 
 def rag_log(level, message, inputs, response):
     """
@@ -366,7 +393,7 @@ def get_rag_documents(handler: RAGCallbackHandler) -> List[RagDocument]:
     Get documents used on RAG context
 
     Args:
-        response: the rag answer
+        handler: the RAG Callback Handler
     """
 
     return [
@@ -384,10 +411,15 @@ def get_rag_debug_data(
 ) -> RagDebugData:
     """RAG debug data assembly"""
 
+    history = []
+    if query.dialog:
+        history = query.dialog.history
+
     return RagDebugData(
         user_question=query.question_answering_prompt.inputs['question'],
-        condense_question_prompt=records_callback_handler.records['chat_prompt'],
-        condense_question=records_callback_handler.records['chat_chain_output'],
+        question_condensing_prompt=records_callback_handler.records['chat_prompt'],
+        question_condensing_history=history,
+        condensed_question=records_callback_handler.records['chat_chain_output'],
         question_answering_prompt=records_callback_handler.records['rag_prompt'],
         documents=get_rag_documents(records_callback_handler),
         document_index_name=query.document_index_name,
diff --git a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py
index 7ec3af49fb..9374d4a547 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/src/gen_ai_orchestrator/services/utils/prompt_utility.py
@@ -11,12 +11,13 @@
 
 logger = logging.getLogger(__name__)
 
-def validate_prompt_template(prompt: PromptTemplate):
+def validate_prompt_template(prompt: PromptTemplate, name: str):
     """
     Prompt template validation
 
     Args:
         prompt: The prompt template
+        name: The prompt name
 
     Returns:
         Nothing.
@@ -27,7 +28,7 @@ def validate_prompt_template(prompt: PromptTemplate):
         try:
             Template(prompt.template).render(prompt.inputs)
         except TemplateError as exc:
-            logger.error('Prompt completion - template validation failed!')
+            logger.error(f'Validation of the prompt Template has failed! ({name})')
             logger.error(exc)
             raise GenAIPromptTemplateException(
                 ErrorInfo(
diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py b/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py
index 6ee2a52776..0e57fa2b7b 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/tests/routers/test_completion_router.py
@@ -32,7 +32,6 @@ def test_generate_sentences():
                 },
                 'model': 'dddddd',
                 'temperature': '0.0',
-                'prompt': 'List 3 ice cream flavors.',
                 'responses': ['1. vanilla\n2. chocolate\n3. strawberry'],
             },
             'prompt': {
@@ -64,7 +63,6 @@ def test_generate_sentences_template_error():
                 },
                 'model': 'dddddd',
                 'temperature': '0.0',
-                'prompt': 'List 3 ice cream flavors.',
                 'responses': ['1. vanilla\n2. chocolate\n3. strawberry'],
             },
             'prompt': {
diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py
index 58c1a00aa2..11580d80eb 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_completion_service.py
@@ -28,4 +28,4 @@ def test_validate_prompt_template():
         },
     }
     template = PromptTemplate(**json)
-    validate_prompt_template(template)
+    validate_prompt_template(template, "prompt_name")
diff --git a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py
index 326e4ed08f..2b4b3b0f4f 100644
--- a/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py
+++ b/gen-ai/orchestrator-server/src/main/python/server/tests/services/test_langchain_factory.py
@@ -126,7 +126,6 @@ def test_get_open_ai_llm_factory():
                 },
                 'model': 'model',
                 'temperature': '0',
-                'prompt': 'List 3 ice cream flavors.',
             }
         )
     )
@@ -148,7 +147,6 @@ def test_get_azure_open_ai_llm_factory():
                 'api_base': 'https://doc.tock.ai/tock',
                 'api_version': 'version',
                 'temperature': '0',
-                'prompt': 'List 3 ice cream flavors.',
             }
         )
     )
@@ -166,7 +164,6 @@ def test_get_fake_llm_factory():
                     'secret': 'ab7***************************A1IV4B',
                 },
                 'temperature': '0',
-                'prompt': 'List 3 ice cream flavors.',
                 'responses': ['1. vanilla\n2. chocolate\n3. strawberry'],
             }
         )
@@ -190,7 +187,6 @@ def test_get_open_ai_em_factory():
                     'secret': 'ab7***************************A1IV4B',
                 },
                 'model': 'model',
-                'prompt': 'List 3 ice cream flavors.',
             }
         )
     )
@@ -211,7 +207,6 @@ def test_get_azure_open_ai_em_factory():
                 'model': 'text-embedding-ada-002',
                 'api_base': 'https://doc.tock.ai/tock',
                 'api_version': 'version',
-                'prompt': 'List 3 ice cream flavors.',
             }
         )
     )
@@ -243,7 +238,6 @@ def test_get_open_search_vector_store_factory():
                     'secret': 'ab7***************************A1IV4B',
                 },
                 'model': 'model',
-                'prompt': 'List 3 ice cream flavors.',
             }
         )
     )
diff --git a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py
index a2355c7365..ec625930a5 100644
--- a/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py
+++ b/gen-ai/orchestrator-server/src/main/python/tock-llm-indexing-tools/index_documents.py
@@ -87,7 +87,7 @@
 from langchain_community.document_loaders.dataframe import DataFrameLoader
 from langchain_core.documents import Document
 
-from indexing_details import IndexingDetails
+from models import IndexingDetails
 
 # Define the size of the csv field -> Set to maximum to process large csvs
 csv.field_size_limit(sys.maxsize)