JobSmithManipulation
diff --git a/‎api/apps/llm_app.py
Lines changed: 25 additions & 15 deletions b/‎api/apps/llm_app.py
Lines changed: 25 additions & 15 deletions
diff --git a/‎api/db/services/llm_service.py
Lines changed: 9 additions & 7 deletions b/‎api/db/services/llm_service.py
Lines changed: 9 additions & 7 deletions
diff --git a/‎conf/llm_factories.json
Lines changed: 12 additions & 12 deletions b/‎conf/llm_factories.json
Lines changed: 12 additions & 12 deletions
@@ -58,40 +58,49 @@ def set_api_key():
     chat_passed, embd_passed, rerank_passed = False, False, False
     factory = req["llm_factory"]
     msg = ""
-    for llm in LLMService.query(fid=factory)[:3]:
+    default_model=req.get("default_model") if req.get("default_model")!="" else None
+    api_version=req.get("api_version")
+    default_model=default_model+'#'+api_version if factory=='Azure-OpenAI'and default_model else default_model
+    for llm in LLMService.query(fid=factory, llm_name=default_model)[:3]:
+        llm_name = llm.llm_name.split('#')[0] if llm.fid == 'Azure-OpenAI' else llm.llm_name
+        api_version = llm.llm_name.split('#')[1] if llm.fid == 'Azure-OpenAI' else None
+
         if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
             mdl = EmbeddingModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm_name, base_url=req.get("base_url"), api_version=api_version)
             try:
                 arr, tc = mdl.encode(["Test if the api key is available"])
                 if len(arr[0]) == 0:
                     raise Exception("Fail")
                 embd_passed = True
+                break
             except Exception as e:
-                msg += f"\nFail to access embedding model({llm.llm_name}) using this api key." + str(e)
+                msg += f"\nFail to access embedding model({llm_name}) using this api key." + str(e)
+
         elif not chat_passed and llm.model_type == LLMType.CHAT.value:
             mdl = ChatModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm_name, base_url=req.get("base_url"), api_version=api_version)
             try:
-                m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], 
-                                 {"temperature": 0.9,'max_tokens':50})
-                if m.find("**ERROR**") >=0:
+                m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}],
+                                 {"temperature": 0.9, 'max_tokens': 50})
+                if m.find("**ERROR**") >= 0:
                     raise Exception(m)
+                chat_passed = True
+                break
             except Exception as e:
-                msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
-                    e)
-            chat_passed = True
+                msg += f"\nFail to access model({llm_name}) using this api key." + str(e)
+
         elif not rerank_passed and llm.model_type == LLMType.RERANK:
             mdl = RerankModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm_name, base_url=req.get("base_url"))
             try:
                 arr, tc = mdl.similarity("What's the weather?", ["Is it sunny today?"])
                 if len(arr) == 0 or tc == 0:
                     raise Exception("Fail")
+                rerank_passed = True
+                break
             except Exception as e:
-                msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
-                    e)
-            rerank_passed = True
+                msg += f"\nFail to access model({llm_name}) using this api key." + str(e)
 
     if msg:
         return get_data_error_result(retmsg=msg)
@@ -105,6 +114,7 @@ def set_api_key():
             llm_config[n] = req[n]
 
     for llm in LLMService.query(fid=factory):
+        llm.llm_name = llm.llm_name.split('#')[0]+'#'+api_version if llm.fid == 'Azure-OpenAI' else llm.llm_name
         if not TenantLLMService.filter_update(
                 [TenantLLM.tenant_id == current_user.id,
                  TenantLLM.llm_factory == factory,
@@ -215,7 +225,7 @@ def apikey_json(keys):
             base_url=llm["api_base"]
         )
         try:
-            m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], {
+            m, tc = mdl.chat("", [{"role": "user", "content": "Hello! How are you doing!"}], {
                              "temperature": 0.9})
             if not tc:
                 raise Exception(m)
 
@@ -102,46 +102,48 @@ def model_instance(cls, tenant_id, llm_type,
                     if not mdlnm:
                         raise LookupError(f"Type of {llm_type} model is not set.")
                     raise LookupError("Model({}) not authorized".format(mdlnm))
-
+        model_with_version = model_config["llm_name"].split('#')
+        model_name = model_with_version[0]
+        api_version = model_with_version[1] if len(model_with_version) > 1 else None
         if llm_type == LLMType.EMBEDDING.value:
             if model_config["llm_factory"] not in EmbeddingModel:
                 return
             return EmbeddingModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+                model_config["api_key"], model_name, base_url=model_config["api_base"], api_version=api_version)
 
         if llm_type == LLMType.RERANK:
             if model_config["llm_factory"] not in RerankModel:
                 return
             return RerankModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+                model_config["api_key"], model_name, base_url=model_config["api_base"])
 
         if llm_type == LLMType.IMAGE2TEXT.value:
             if model_config["llm_factory"] not in CvModel:
                 return
             return CvModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], lang,
+                model_config["api_key"], model_name, lang,
                 base_url=model_config["api_base"]
             )
 
         if llm_type == LLMType.CHAT.value:
             if model_config["llm_factory"] not in ChatModel:
                 return
             return ChatModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+                model_config["api_key"], model_name, base_url=model_config["api_base"], api_version=api_version)
 
         if llm_type == LLMType.SPEECH2TEXT:
             if model_config["llm_factory"] not in Seq2txtModel:
                 return
             return Seq2txtModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], lang,
+                model_config["api_key"],model_name, lang,
                 base_url=model_config["api_base"]
             )
         if llm_type == LLMType.TTS:
             if model_config["llm_factory"] not in TTSModel:
                 return
             return TTSModel[model_config["llm_factory"]](
                 model_config["api_key"],
-                model_config["llm_name"],
+                model_name,
                 base_url=model_config["api_base"],
             )
 
 
@@ -607,73 +607,73 @@
             "status": "1",
             "llm": [
                 {
-                    "llm_name": "gpt-4o-mini",
+                    "llm_name": "gpt-4o-mini#2024-02-01",
                     "tags": "LLM,CHAT,128K",
                     "max_tokens": 128000,
                     "model_type": "image2text"
                 },
                 {
-                    "llm_name": "gpt-4o",
+                    "llm_name": "gpt-4o#2024-02-01",
                     "tags": "LLM,CHAT,128K",
                     "max_tokens": 128000,
                     "model_type": "chat,image2text"
                 },
                 {
-                    "llm_name": "gpt-35-turbo",
+                    "llm_name": "gpt-3.5-turbo#2024-02-01",
                     "tags": "LLM,CHAT,4K",
                     "max_tokens": 4096,
                     "model_type": "chat"
                 },
                 {
-                    "llm_name": "gpt-35-turbo-16k",
+                    "llm_name": "gpt-3.5-turbo-16k#2024-02-01",
                     "tags": "LLM,CHAT,16k",
                     "max_tokens": 16385,
                     "model_type": "chat"
                 },
                 {
-                    "llm_name": "text-embedding-ada-002",
+                    "llm_name": "text-embedding-ada-002#2024-02-01",
                     "tags": "TEXT EMBEDDING,8K",
                     "max_tokens": 8191,
                     "model_type": "embedding"
                 },
                 {
-                    "llm_name": "text-embedding-3-small",
+                    "llm_name": "text-embedding-3-small#2024-02-01",
                     "tags": "TEXT EMBEDDING,8K",
                     "max_tokens": 8191,
                     "model_type": "embedding"
                 },
                 {
-                    "llm_name": "text-embedding-3-large",
+                    "llm_name": "text-embedding-3-large#2024-02-01",
                     "tags": "TEXT EMBEDDING,8K",
                     "max_tokens": 8191,
                     "model_type": "embedding"
                 },
                 {
-                    "llm_name": "whisper-1",
+                    "llm_name": "whisper-1#2024-02-01",
                     "tags": "SPEECH2TEXT",
                     "max_tokens": 26214400,
                     "model_type": "speech2text"
                 },
                 {
-                    "llm_name": "gpt-4",
+                    "llm_name": "gpt-4#2024-02-01",
                     "tags": "LLM,CHAT,8K",
                     "max_tokens": 8191,
                     "model_type": "chat"
                 },
                 {
-                    "llm_name": "gpt-4-turbo",
+                    "llm_name": "gpt-4-turbo#2024-02-01",
                     "tags": "LLM,CHAT,8K",
                     "max_tokens": 8191,
                     "model_type": "chat"
                 },
                 {
-                    "llm_name": "gpt-4-32k",
+                    "llm_name": "gpt-4-32k#2024-02-01",
                     "tags": "LLM,CHAT,32K",
                     "max_tokens": 32768,
                     "model_type": "chat"
                 },
                 {
-                    "llm_name": "gpt-4-vision-preview",
+                    "llm_name": "gpt-4-vision-preview#2024-02-01",
                     "tags": "LLM,CHAT,IMAGE2TEXT",
                     "max_tokens": 765,
                     "model_type": "image2text"