Skip to content

Commit d1115f1

Browse files
authored
Merge pull request oobabooga#6260 from oobabooga/dev
Merge dev branch
2 parents 0315122 + 3ee6822 commit d1115f1

32 files changed

+602
-321
lines changed

README.md

+22-22
Original file line numberDiff line numberDiff line change
@@ -204,16 +204,16 @@ List of command-line flags
204204
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
205205
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices]
206206
[--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code]
207-
[--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn]
208-
[--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
209-
[--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE]
210-
[--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN]
211-
[--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention]
212-
[--hqq-backend HQQ_BACKEND] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE]
213-
[--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH]
214-
[--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT]
215-
[--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]]
216-
[--checkpoint CHECKPOINT] [--monkey-patch]
207+
[--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE]
208+
[--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock]
209+
[--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm]
210+
[--attention-sink-size ATTENTION_SINK_SIZE] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa]
211+
[--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama]
212+
[--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed]
213+
[--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen]
214+
[--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE]
215+
[--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
216+
[--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch]
217217
218218
Text generation web UI
219219
@@ -254,6 +254,7 @@ Transformers/Accelerate:
254254
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
255255
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
256256
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
257+
--use_eager_attention Set attn_implementation= eager while loading the model.
257258
258259
bitsandbytes 4-bit:
259260
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
@@ -263,7 +264,7 @@ bitsandbytes 4-bit:
263264
264265
llama.cpp:
265266
--flash-attn Use flash-attention.
266-
--tensorcores Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.
267+
--tensorcores NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.
267268
--n_ctx N_CTX Size of the prompt context.
268269
--threads THREADS Number of threads to use.
269270
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
@@ -272,7 +273,7 @@ llama.cpp:
272273
--no-mmap Prevent mmap from being used.
273274
--mlock Force the system to keep the model in RAM.
274275
--n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU.
275-
--tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.
276+
--tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
276277
--numa Activate NUMA task allocation for llama.cpp.
277278
--logits_all Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.
278279
--no_offload_kqv Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
@@ -287,6 +288,8 @@ ExLlamaV2:
287288
--max_seq_len MAX_SEQ_LEN Maximum sequence length.
288289
--cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
289290
--no_flash_attn Force flash-attention to not be used.
291+
--no_xformers Force xformers to not be used.
292+
--no_sdpa Force Torch SDPA to not be used.
290293
--cache_8bit Use 8-bit cache to save VRAM.
291294
--cache_4bit Use Q4 cache to save VRAM.
292295
--num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral.
@@ -307,6 +310,9 @@ AutoAWQ:
307310
HQQ:
308311
--hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
309312
313+
TensorRT-LLM:
314+
--cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
315+
310316
DeepSpeed:
311317
--deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
312318
--nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
@@ -327,6 +333,7 @@ Gradio:
327333
--gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
328334
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
329335
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
336+
--subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
330337
331338
API:
332339
--api Enable the API extension.
@@ -392,18 +399,11 @@ Run `python download-model.py --help` to see all the options.
392399

393400
https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/main/Colab-TextGen-GPU.ipynb
394401

395-
## Acknowledgment
396-
397-
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
398-
399-
## Links
400-
401-
#### Community
402+
## Community
402403

403404
* Subreddit: https://www.reddit.com/r/oobabooga/
404405
* Discord: https://discord.gg/jwZCF2dPQN
405406

406-
#### Support
407+
## Acknowledgment
407408

408-
* ko-fi: https://ko-fi.com/oobabooga
409-
* GitHub Sponsors: https://github.com/sponsors/oobabooga
409+
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.

css/chat_style-TheEncrypted777.css

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
line-height: 1.428571429 !important;
9191
color: rgb(243 244 246) !important;
9292
text-shadow: 2px 2px 2px rgb(0 0 0);
93+
font-weight: 500;
9394
}
9495

9596
.message-body p em {

css/chat_style-cai-chat.css

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
.message-body p {
4747
font-size: 15px !important;
4848
line-height: 22.5px !important;
49+
font-weight: 500;
4950
}
5051

5152
.message-body p, .chat .message-body ul, .chat .message-body ol {
@@ -59,4 +60,4 @@
5960
.message-body p em {
6061
color: rgb(110 110 110) !important;
6162
font-weight: 500;
62-
}
63+
}

css/chat_style-messenger.css

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
margin-bottom: 0 !important;
8989
font-size: 15px !important;
9090
line-height: 1.428571429 !important;
91+
font-weight: 500;
9192
}
9293

9394
.dark .message-body p em {

css/chat_style-wpp.css

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
margin-bottom: 0 !important;
4545
font-size: 15px !important;
4646
line-height: 1.428571429 !important;
47+
font-weight: 500;
4748
}
4849

4950
.dark .message-body p em {
@@ -52,4 +53,4 @@
5253

5354
.message-body p em {
5455
color: rgb(110 110 110) !important;
55-
}
56+
}

css/highlightjs/github.min.css

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

css/main.css

+16-5
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,6 @@ ol li p, ul li p {
6262
border: 0;
6363
}
6464

65-
.gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
66-
color: white;
67-
}
68-
6965
.gradio-container {
7066
max-width: 100% !important;
7167
padding-top: 0 !important;
@@ -378,6 +374,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
378374
}
379375
}
380376

377+
.chat-parent .prose {
378+
visibility: visible;
379+
}
380+
381381
.old-ui .chat-parent {
382382
height: calc(100dvh - 192px - var(--header-height) - var(--input-delta));
383383
margin-bottom: var(--input-delta) !important;
@@ -399,6 +399,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
399399
padding-bottom: 15px !important;
400400
}
401401

402+
.message-body h1,
403+
.message-body h2,
404+
.message-body h3,
405+
.message-body h4 {
406+
color: var(--body-text-color);
407+
}
408+
402409
.message-body li {
403410
list-style-position: outside;
404411
}
@@ -447,6 +454,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
447454
border-radius: 5px;
448455
font-size: 82%;
449456
padding: 1px 3px;
457+
background: white !important;
458+
color: #1f2328;
459+
}
460+
461+
.dark .message-body code {
450462
background: #0d1117 !important;
451463
color: rgb(201 209 217);
452464
}
@@ -796,4 +808,3 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
796808
max-height: 300px;
797809
}
798810
}
799-

js/dark_theme.js

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
function toggleDarkMode() {
2+
document.body.classList.toggle("dark");
3+
var currentCSS = document.getElementById("highlight-css");
4+
if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") {
5+
currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css");
6+
} else {
7+
currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
8+
}
9+
}

js/main.js

+3-6
Original file line numberDiff line numberDiff line change
@@ -445,14 +445,12 @@ function updateCssProperties() {
445445

446446
// Check if the chat container is visible
447447
if (chatContainer.clientHeight > 0) {
448-
449-
// Calculate new chat height and adjust CSS properties
450448
var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100;
451449
if (document.getElementById("chat-tab").style.paddingBottom != "") {
452450
numericHeight += 20;
453451
}
454-
const newChatHeight = `${numericHeight}px`;
455452

453+
const newChatHeight = `${numericHeight}px`;
456454
document.documentElement.style.setProperty("--chat-height", newChatHeight);
457455
document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`);
458456

@@ -463,15 +461,14 @@ function updateCssProperties() {
463461

464462
// Adjust scrollTop based on input height change
465463
if (chatInputHeight !== currentChatInputHeight) {
466-
chatContainer.scrollTop += chatInputHeight > currentChatInputHeight ? chatInputHeight : -chatInputHeight + 40;
464+
chatContainer.scrollTop += chatInputHeight - currentChatInputHeight;
467465
currentChatInputHeight = chatInputHeight;
468466
}
469467
}
470468
}
471469

472470
// Observe textarea size changes and call update function
473-
new ResizeObserver(updateCssProperties)
474-
.observe(document.querySelector("#chat-input textarea"));
471+
new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-input textarea"));
475472

476473
// Handle changes in window size
477474
window.addEventListener("resize", updateCssProperties);

modules/block_requests.py

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import requests
55

6+
from modules import shared
67
from modules.logging_colors import logger
78

89
original_open = open
@@ -54,6 +55,7 @@ def my_open(*args, **kwargs):
5455
'\n <script src="file/js/katex/auto-render.min.js"></script>'
5556
'\n <script src="file/js/highlightjs/highlight.min.js"></script>'
5657
'\n <script src="file/js/highlightjs/highlightjs-copy.min.js"></script>'
58+
f'\n <link id="highlight-css" rel="stylesheet" href="file/css/highlightjs/{"github-dark" if shared.settings["dark_theme"] else "github"}.min.css">'
5759
'\n <script>hljs.addPlugin(new CopyButtonPlugin());</script>'
5860
'\n </head>'
5961
)

0 commit comments

Comments
 (0)