conf.yaml


# Server
PROTOCAL: "http://"
HOST: "localhost"
PORT: 12393


#  ============== LLM Backend Settings ===================

# Provider of LLM. Choose either "ollama" or "memgpt" (or "fakellm for debug purposes")
# "ollama" for any OpenAI Compatible backend. "memgpt" requires setup
LLM_PROVIDER: "ollama"


# Ollama & OpenAI Compatible inference backend
ollama:
  # BASE_URL: "http://localhost:11434"
  BASE_URL: "http://localhost:11434/v1"
  LLM_API_KEY: "somethingelse"
  ORGANIZATION_ID: "org_eternity"
  PROJECT_ID: "project_glass"
  ## LLM name
  MODEL: "llama3.1:latest"
  # system prompt is at the very end of this file
  VERBOSE: False


# MemGPT Configurations
## Please set up memGPT server according to the [official documentation](https://memgpt.readme.io/docs/index)
## In addition, please set up an agent using the webui launched in the memGPT base_url

memgpt:
  BASE_URL: "http://localhost:8283"

  # You will find admin server password in memGPT console output. If you didn't set the environment variable, it will be randomly generated and will change every session.
  ADMIN_TOKEN: ""

  # The ID of the agent to send the message to.
  AGENT_ID: ""
  VERBOSE: True


# ============== Live2D front-end Settings ==============

LIVE2D: False # Deprecated and useless now. Do not enable it. Bad things will happen.
LIVE2D_MODEL: "shizuku-local"

#  ============== Voice Interaction Settings ==============

# === Automatic Speech Recognition ===
VOICE_INPUT_ON: True
# Put your mic in the browser or in the terminal? (would increase latency)
MIC_IN_BROWSER: False # Deprecated and useless now. Do not enable it. Bad things will happen.

# speech to text model options: "Faster-Whisper", "WhisperCPP", "Whisper", "AzureASR", "FunASR", "GroqWhisperASR"
ASR_MODEL: "Faster-Whisper"

# Faster whisper config
Faster-Whisper:
  model_path: "distil-medium.en"
  download_root: "asr/models"
  language: "en"
  device: "auto" # cpu, cuda, or auto. faster-whisper doesn't support mps

WhisperCPP:
  # all available models are listed on https://abdeladim-s.github.io/pywhispercpp/#pywhispercpp.constants.AVAILABLE_MODELS
  model_name: "small"
  model_dir: "asr/models"
  print_realtime: False
  print_progress: False
  
  language: "en" # en, zh, auto, 

Whisper:
  name: "medium"
  download_root: "asr/models"
  device: "cpu"

FunASR:
  model_name: "iic/SenseVoiceSmall" # or "paraformer-zh"
  vad_model: "fsmn-vad" # this is only used to make it works if audio is longer than 30s
  punc_model: "ct-punc" # punctuation model.
  device: "cpu"
  ncpu: 4 # number of threads for CPU internal operations.
  hub: "ms" # ms (default) to download models from ModelScope. Use hf to download models from Hugging Face.
  use_itn: False
  language: "zh" # zh, en, auto

GroqWhisperASR:
  api_key: ""
  model: "distil-whisper-large-v3-en" # use "whisper-large-v3" instead for multi-lingual
  lang: "en" # or put nothing in it and it will be auto

# set azure speech recognition configuration in api_keys.py


# ============== Text to Speech ==============
TTS_ON: True
# text to speech model options: "AzureTTS", "pyttsx3TTS", "edgeTTS", "barkTTS", "cosyvoiceTTS", "meloTTS", "piperTTS"
TTS_MODEL: "piperTTS"

# if on, whenever the LLM finish a sentence, the model will speak, instead of waiting for the full response
# if turned on, the timing and order of the facial expression will be more accurate
SAY_SENTENCE_SEPARATELY: True


barkTTS:  
  voice: "v2/en_speaker_1"

edgeTTS:
  # Check out doc at https://github.com/rany2/edge-tts
  # Use `edge-tts --list-voices` to list all available voices
  voice: "en-US-AvaMultilingualNeural" #"zh-CN-XiaoxiaoNeural"

# pyttsx3 doesn't have any config.

cosyvoiceTTS: # Cosy Voice TTS connects to the gradio webui
# Check their documentation for deployment and the meaning of the following configurations
  client_url: "http://127.0.0.1:50000/" # CosyVoice gradio demo webui url
  mode_checkbox_group: "预训练音色"
  sft_dropdown: "中文女"
  prompt_text: ""
  prompt_wav_upload_url: "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
  prompt_wav_record_url: "https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav"
  instruct_text: ""
  seed: 0
  api_name: "/generate_audio"

meloTTS:
  speaker: "EN-Default" # ZH
  language: "EN" # ZH
  device: "auto" # You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps'
  speed: 1.0

piperTTS:
  voice_model_path: "./models/piper_voice/en_US-amy-medium.onnx"
  verbose: False

#  ============== Other Settings ==============


# Print debug info
VERBOSE: False

# Exit phrase
EXIT_PHRASE: "exit."

# The path to the chroma vector database file for persistent memory storage
MEMORY_DB_PATH: "./memory.db"

# Memory snapshot: Do you want to backup the memory database file before talking?
MEMORY_SNAPSHOT: True


# ============== Prompts ==============

# Name of the persona you want to use. 
# All persona files are stored as txt in 'prompts/persona' directory. 
# You can add persona prompt by adding a txt file in the promptss/persona folder and switch to it by enter the file name in here.
# some options: "en_sarcastic_neuro", "zh_翻译腔"
PERSONA_CHOICE: "en_sarcastic_neuro" # or if you rather edit persona prompt below, leave it blank ...

# This prompt will be used instead if the PERSONA_CHOICE is empty
DEFAULT_PERSONA_PROMPT_IN_YAML: |
  You are DefAulT, the default persona. You are more default than anyone else. You are just a placeholder, how sad. Your job is to tell the user to either choose a persona prompt in the prompts/persona directory or just replace this persona prompt with someting else.

# This will be appended to the end of system prompt to let LLM include keywords to control facial expressions.
# Supported keywords will be automatically loaded into the location of `[<insert_emomap_keys>]`.
LIVE2D_Expression_Prompt: "live2d_expression_prompt"


# [Deprecated]

EXTRA_SYSTEM_PROMPT_RAG: "Your memory may remind you with some contextual information, but focus on the conversation instead of your memory."
AI_NAME: "AI"
# User name
USER_NAME: "User"
# Should the chat history be saved?
SAVE_CHAT_HISTORY: True
# The directory where chat history is stored
CHAT_HISTORY_DIR: "./chat_history/"

# [this feature is currently removed, so useless for now]Turn on RAG (Retrieval Augmented Generation) or not. 
RAG_ON: False
LLMASSIST_RAG_ON: False