kalomaze
diff --git a/‎Colab-TextGen-GPU.ipynb
+2-5 b/‎Colab-TextGen-GPU.ipynb
+2-5
diff --git a/‎README.md
+12-1 b/‎README.md
+12-1
diff --git a/‎docker/.env.example
+3 b/‎docker/.env.example
+3
diff --git a/‎docker/amd/Dockerfile
+21 b/‎docker/amd/Dockerfile
+21
diff --git a/‎docker/amd/docker-compose.yml
+57 b/‎docker/amd/docker-compose.yml
+57
diff --git a/‎docker/cpu/Dockerfile
+25 b/‎docker/cpu/Dockerfile
+25
diff --git a/‎docker/cpu/docker-compose.yml
+47 b/‎docker/cpu/docker-compose.yml
+47
diff --git a/‎docker/intel/Dockerfile
+21 b/‎docker/intel/Dockerfile
+21
diff --git a/‎docker/intel/docker-compose.yml
+55 b/‎docker/intel/docker-compose.yml
+55
diff --git a/‎docker/nvidia/Dockerfile
+9-45 b/‎docker/nvidia/Dockerfile
+9-45
diff --git a/‎docker/docker-compose.yml ‎docker/nvidia/docker-compose.yml
+2 b/‎docker/docker-compose.yml ‎docker/nvidia/docker-compose.yml
+2
diff --git a/‎download-model.py
+10-2 b/‎download-model.py
+10-2
@@ -65,12 +65,9 @@
         "  torver = torch.__version__\n",
         "  print(f\"TORCH: {torver}\")\n",
         "  is_cuda118 = '+cu118' in torver  # 2.1.0+cu118\n",
-        "  is_cuda117 = '+cu117' in torver  # 2.0.1+cu117\n",
         "\n",
         "  textgen_requirements = open('requirements.txt').read().splitlines()\n",
-        "  if is_cuda117:\n",
-        "      textgen_requirements = [req.replace('+cu121', '+cu117').replace('+cu122', '+cu117').replace('torch2.1', 'torch2.0') for req in textgen_requirements]\n",
-        "  elif is_cuda118:\n",
+        "  if is_cuda118:\n",
         "      textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
         "  with open('temp_requirements.txt', 'w') as file:\n",
         "      file.write('\\n'.join(textgen_requirements))\n",
@@ -130,4 +127,4 @@
       "outputs": []
     }
   ]
-}
+}
@@ -160,12 +160,23 @@ The `requirements*.txt` above contain various wheels precompiled through GitHub
 ### Alternative: Docker
 
 ```
-ln -s docker/{nvidia/Dockerfile,docker-compose.yml,.dockerignore} .
+For NVIDIA GPU:
+ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
+For AMD GPU: 
+ln -s docker/{amd/Dockerfile,intel/docker-compose.yml,.dockerignore} .
+For Intel GPU:
+ln -s docker/{intel/Dockerfile,amd/docker-compose.yml,.dockerignore} .
+For CPU only
+ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
 cp docker/.env.example .env
+#Create logs/cache dir : 
+mkdir -p logs cache
 # Edit .env and set: 
 #   TORCH_CUDA_ARCH_LIST based on your GPU model
 #   APP_RUNTIME_GID      your host user's group id (run `id -g` in a terminal)
 #   BUILD_EXTENIONS      optionally add comma separated list of extensions to build
+# Edit CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
+# 
 docker compose up --build
 ```
 
 
@@ -20,3 +20,6 @@ APP_RUNTIME_GID=6972
 # override default app build permissions (handy for deploying to cloud)
 #APP_GID=6972
 #APP_UID=6972
+# Set cache env
+TRANSFORMERS_CACHE=/home/app/text-generation-webui/cache/
+HF_HOME=/home/app/text-generation-webui/cache/
@@ -0,0 +1,21 @@
+# BUILDER
+FROM ubuntu:22.04
+WORKDIR /builder
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
+ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
+ARG APP_UID="${APP_UID:-6972}"
+ARG APP_GID="${APP_GID:-6972}"
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
+    apt update && \
+    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /home/app/
+RUN git clone https://github.com/oobabooga/text-generation-webui.git 
+WORKDIR /home/app/text-generation-webui
+RUN GPU_CHOICE=B USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+COPY CMD_FLAGS.txt /home/app/text-generation-webui/
+EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
+WORKDIR /home/app/text-generation-webui
+# set umask to ensure group read / write at runtime
+CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
@@ -0,0 +1,57 @@
+version: "3.3"
+services:
+  text-generation-webui:
+    build:
+      context: .
+      args:
+        # Requirements file to use: 
+        # | GPU | CPU | requirements file to use |
+        # |--------|---------|---------|
+        # | NVIDIA | has AVX2 | `requirements.txt` |
+        # | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
+        # | AMD | has AVX2 | `requirements_amd.txt` |
+        # | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
+        # | CPU only | has AVX2 | `requirements_cpu_only.txt` |
+        # | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
+        # | Apple | Intel | `requirements_apple_intel.txt` |
+        # | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
+        # Default: requirements.txt`
+        # BUILD_REQUIREMENTS: requirements.txt
+        
+        # Extension requirements to build: 
+        # BUILD_EXTENSIONS: 
+        
+        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
+        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
+        APP_GID: ${APP_GID:-6972} 
+        APP_UID: ${APP_UID-6972} 
+    env_file: .env
+    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
+    ports:
+      - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
+      - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
+    stdin_open: true
+    group_add:
+      - video
+    tty: true
+    ipc: host
+    devices:
+      - /dev/kfd
+      - /dev/dri 
+    cap_add: 
+      - SYS_PTRACE
+    security_opt:
+      - seccomp=unconfined
+    volumes:
+      - ./cache:/home/app/text-generation-webui/cache
+      - ./characters:/home/app/text-generation-webui/characters
+      - ./extensions:/home/app/text-generation-webui/extensions
+      - ./loras:/home/app/text-generation-webui/loras
+      - ./logs:/home/app/text-generation-webui/logs
+      - ./models:/home/app/text-generation-webui/models
+      - ./presets:/home/app/text-generation-webui/presets
+      - ./prompts:/home/app/text-generation-webui/prompts
+      - ./softprompts:/home/app/text-generation-webui/softprompts
+      - ./training:/home/app/text-generation-webui/training
+      - ./cloudflared:/etc/cloudflared
@@ -0,0 +1,25 @@
+# BUILDER
+FROM ubuntu:22.04
+WORKDIR /builder
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
+ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
+ARG APP_UID="${APP_UID:-6972}"
+ARG APP_GID="${APP_GID:-6972}"
+ARG GPU_CHOICE=A
+ARG USE_CUDA118=FALSE 
+ARG LAUNCH_AFTER_INSTALL=FALSE 	
+ARG INSTALL_EXTENSIONS=TRUE
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
+    apt update && \
+    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /home/app/
+RUN git clone https://github.com/oobabooga/text-generation-webui.git 
+WORKDIR /home/app/text-generation-webui
+RUN GPU_CHOICE=N USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+COPY CMD_FLAGS.txt /home/app/text-generation-webui/
+EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
+# set umask to ensure group read / write at runtime
+WORKDIR /home/app/text-generation-webui
+CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
@@ -0,0 +1,47 @@
+version: "3.3"
+services:
+  text-generation-webui:
+    build:
+      context: .
+      args:
+        # Requirements file to use: 
+        # | GPU | CPU | requirements file to use |
+        # |--------|---------|---------|
+        # | NVIDIA | has AVX2 | `requirements.txt` |
+        # | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
+        # | AMD | has AVX2 | `requirements_amd.txt` |
+        # | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
+        # | CPU only | has AVX2 | `requirements_cpu_only.txt` |
+        # | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
+        # | Apple | Intel | `requirements_apple_intel.txt` |
+        # | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
+        # Default: requirements.txt`
+        # BUILD_REQUIREMENTS: requirements.txt
+        
+        # Extension requirements to build: 
+        # BUILD_EXTENSIONS: 
+        
+        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
+        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
+        APP_GID: ${APP_GID:-6972} 
+        APP_UID: ${APP_UID-6972} 
+    env_file: .env
+    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
+    ports:
+      - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
+      - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
+    stdin_open: true
+    tty: true
+    volumes:
+      - ./cache:/home/app/text-generation-webui/cache
+      - ./characters:/home/app/text-generation-webui/characters
+      - ./extensions:/home/app/text-generation-webui/extensions
+      - ./loras:/home/app/text-generation-webui/loras
+      - ./logs:/home/app/text-generation-webui/logs
+      - ./models:/home/app/text-generation-webui/models
+      - ./presets:/home/app/text-generation-webui/presets
+      - ./prompts:/home/app/text-generation-webui/prompts
+      - ./softprompts:/home/app/text-generation-webui/softprompts
+      - ./training:/home/app/text-generation-webui/training
+      - ./cloudflared:/etc/cloudflared
@@ -0,0 +1,21 @@
+# BUILDER
+FROM ubuntu:22.04
+WORKDIR /builder
+ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
+ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
+ARG APP_UID="${APP_UID:-6972}"
+ARG APP_GID="${APP_GID:-6972}"
+
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
+    apt update && \
+    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /home/app/
+RUN git clone https://github.com/oobabooga/text-generation-webui.git 
+WORKDIR /home/app/text-generation-webui
+RUN GPU_CHOICE=D USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+COPY CMD_FLAGS.txt /home/app/text-generation-webui/
+EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
+# set umask to ensure group read / write at runtime
+WORKDIR /home/app/text-generation-webui
+CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
@@ -0,0 +1,55 @@
+version: "3.3"
+services:
+  text-generation-webui:
+    build:
+      context: .
+      args:
+        # Requirements file to use: 
+        # | GPU | CPU | requirements file to use |
+        # |--------|---------|---------|
+        # | NVIDIA | has AVX2 | `requirements.txt` |
+        # | NVIDIA | no AVX2 | `requirements_noavx2.txt` |
+        # | AMD | has AVX2 | `requirements_amd.txt` |
+        # | AMD | no AVX2 | `requirements_amd_noavx2.txt` |
+        # | CPU only | has AVX2 | `requirements_cpu_only.txt` |
+        # | CPU only | no AVX2 | `requirements_cpu_only_noavx2.txt` |
+        # | Apple | Intel | `requirements_apple_intel.txt` |
+        # | Apple | Apple Silicon | `requirements_apple_silicon.txt` |
+        # Default: requirements.txt`
+        # BUILD_REQUIREMENTS: requirements.txt
+        
+        # Extension requirements to build: 
+        # BUILD_EXTENSIONS: 
+        
+        # specify which cuda version your card supports: https://developer.nvidia.com/cuda-gpus
+        TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST:-7.5} 
+        BUILD_EXTENSIONS: ${BUILD_EXTENSIONS:-}
+        APP_GID: ${APP_GID:-6972} 
+        APP_UID: ${APP_UID-6972} 
+    env_file: .env
+    user: "${APP_RUNTIME_UID:-6972}:${APP_RUNTIME_GID:-6972}"
+    ports:
+      - "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
+      - "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
+    stdin_open: true
+    group_add:
+      - video
+    tty: true
+    ipc: host
+    devices:
+      - /dev/kfd
+      - /dev/dri 
+    cap_add: 
+      - SYS_PTRACE
+    security_opt:
+      - seccomp=unconfined
+    volumes:
+      - ./characters:/home/app/text-generation-webui/characters
+      - ./extensions:/home/app/text-generation-webui/extensions
+      - ./loras:/home/app/text-generation-webui/loras
+      - ./models:/home/app/text-generation-webui/models
+      - ./presets:/home/app/text-generation-webui/presets
+      - ./prompts:/home/app/text-generation-webui/prompts
+      - ./softprompts:/home/app/text-generation-webui/softprompts
+      - ./training:/home/app/text-generation-webui/training
+      - ./cloudflared:/etc/cloudflared
@@ -1,57 +1,21 @@
 # BUILDER
-FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 as builder
+FROM ubuntu:22.04
 WORKDIR /builder
 ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
 ARG BUILD_EXTENSIONS="${BUILD_EXTENSIONS:-}"
-ARG BUILD_REQUIREMENTS="${BUILD_REQUIREMENTS:-requirements.txt}"
 ARG APP_UID="${APP_UID:-6972}"
 ARG APP_GID="${APP_GID:-6972}"
-# create / update build env
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
-    apt update && \
-    apt install --no-install-recommends -y git vim build-essential python3-dev pip && \
-    rm -rf /var/lib/apt/lists/*
-RUN --mount=type=cache,target=/root/.cache/pip,rw \
-    pip3 install --global --upgrade pip wheel setuptools && \
-    # make shared builder & runtime app user
-    addgroup --gid $APP_GID app_grp && \
-    useradd -m -u $APP_UID --gid app_grp app
-USER app:app_grp
-# build wheels for runtime
-WORKDIR /home/app/build
-COPY --chown=app:app_grp "$BUILD_REQUIREMENTS" /home/app/build/requirements.txt
-COPY --chown=app:app_grp extensions /home/app/build/extensions
-RUN --mount=type=cache,target=/root/.cache/pip,rw \
-    # build all requirements files as wheel dists
-    pip3 wheel -w wheels -r requirements.txt `echo "$BUILD_EXTENSIONS" | sed -r 's/([^,]+)\s*,?\s*/ -r \/home\/app\/build\/extensions\/\1\/requirements.txt/g'`
-    # drop wheel and setuptools .whl to avoid install issues
-RUN rm wheels/setuptools*.whl
 
-# RUNTIME
-FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
-ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6}"
-ARG APP_UID="${APP_UID:-6972}"
-ARG APP_GID="${APP_GID:-6972}"
-ENV CLI_ARGS=""
-# create / update runtime env
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
     apt update && \
-    apt install --no-install-recommends -y git python3 pip && \
-    rm -rf /var/lib/apt/lists/* && \
-    pip3 install --global --no-cache --upgrade pip wheel setuptools && \
-    # make shared builder & runtime app user
-    addgroup --gid $APP_GID app_grp && \
-    useradd -m -u $APP_UID --gid app_grp app
-USER app:app_grp
-# install locally built wheels for app
-WORKDIR /home/app/wheels
-COPY --from=builder /home/app/build/wheels /home/app/wheels
-COPY --chown=app:app_grp . /home/app/text-generation-webui
-RUN umask 0002 && \
-    chmod g+rwX /home/app/text-generation-webui && \
-    pip3 install --global --no-build-isolation --no-cache --no-index ./*.whl && \
-    rm -r /home/app/wheels
+    apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
+    rm -rf /var/lib/apt/lists/*
+WORKDIR /home/app/
+RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
+RUN GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
+WORKDIR /home/app/text-generation-webui
 # set umask to ensure group read / write at runtime
-CMD umask 0002 && export HOME=/home/app && python3 server.py ${CLI_ARGS}
+CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh
@@ -34,9 +34,11 @@ services:
     stdin_open: true
     tty: true
     volumes:
+      - ./cache:/home/app/text-generation-webui/cache
       - ./characters:/home/app/text-generation-webui/characters
       - ./extensions:/home/app/text-generation-webui/extensions
       - ./loras:/home/app/text-generation-webui/loras
+      - ./logs:/home/app/text-generation-webui/logs
       - ./models:/home/app/text-generation-webui/models
       - ./presets:/home/app/text-generation-webui/presets
       - ./prompts:/home/app/text-generation-webui/prompts
 
@@ -30,10 +30,18 @@ def __init__(self, max_retries=5):
         if max_retries:
             self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
             self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
+
         if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
             self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
-        if os.getenv('HF_TOKEN') is not None:
-            self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
+
+        try:
+            from huggingface_hub import get_token
+            token = get_token()
+        except ImportError:
+            token = os.getenv("HF_TOKEN")
+
+        if token is not None:
+            self.session.headers = {'authorization': f'Bearer {token}'}
 
     def sanitize_model_and_branch_names(self, model, branch):
         if model[-1] == '/':