Skip to content

Commit dc6adef

Browse files
authored
Merge pull request oobabooga#5496 from oobabooga/dev
Merge dev branch
2 parents 0f134bf + 069ed7c commit dc6adef

19 files changed

+230
-130
lines changed

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,12 @@ conda activate textgen
7575

7676
| System | GPU | Command |
7777
|--------|---------|---------|
78-
| Linux/WSL | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121` |
79-
| Linux/WSL | CPU only | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu` |
80-
| Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6` |
81-
| MacOS + MPS | Any | `pip3 install torch torchvision torchaudio` |
82-
| Windows | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121` |
83-
| Windows | CPU only | `pip3 install torch torchvision torchaudio` |
78+
| Linux/WSL | NVIDIA | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu121` |
79+
| Linux/WSL | CPU only | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cpu` |
80+
| Linux | AMD | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/rocm5.6` |
81+
| MacOS + MPS | Any | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.*` |
82+
| Windows | NVIDIA | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu121` |
83+
| Windows | CPU only | `pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.*` |
8484

8585
The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
8686

@@ -145,7 +145,7 @@ Then browse to
145145
1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12:
146146

147147
```
148-
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
148+
pip3 install torch==2.1.* torchvision==0.16.* torchaudio==2.1.* --index-url https://download.pytorch.org/whl/cu118
149149
conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime
150150
```
151151

css/main.css

+4
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
408408
margin-bottom: 0 !important;
409409
}
410410

411+
#default-tab .prose pre, #notebook-tab .prose pre {
412+
overflow: scroll;
413+
}
414+
411415
.message-body code {
412416
white-space: pre-wrap !important;
413417
word-wrap: break-word !important;

modules/llama_cpp_python_hijack.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from typing import Sequence
2+
3+
from tqdm import tqdm
4+
5+
try:
6+
import llama_cpp
7+
except:
8+
llama_cpp = None
9+
10+
try:
11+
import llama_cpp_cuda
12+
except:
13+
llama_cpp_cuda = None
14+
15+
try:
16+
import llama_cpp_cuda_tensorcores
17+
except:
18+
llama_cpp_cuda_tensorcores = None
19+
20+
21+
def eval_with_progress(self, tokens: Sequence[int]):
22+
"""
23+
A copy of
24+
25+
https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py
26+
27+
with tqdm to show prompt processing progress.
28+
"""
29+
assert self._ctx.ctx is not None
30+
assert self._batch.batch is not None
31+
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
32+
33+
if len(tokens) > 1:
34+
progress_bar = tqdm(range(0, len(tokens), self.n_batch), desc="Prompt evaluation", leave=False)
35+
else:
36+
progress_bar = range(0, len(tokens), self.n_batch)
37+
38+
for i in progress_bar:
39+
batch = tokens[i: min(len(tokens), i + self.n_batch)]
40+
n_past = self.n_tokens
41+
n_tokens = len(batch)
42+
self._batch.set_batch(
43+
batch=batch, n_past=n_past, logits_all=self.context_params.logits_all
44+
)
45+
self._ctx.decode(self._batch)
46+
# Save tokens
47+
self.input_ids[n_past: n_past + n_tokens] = batch
48+
# Save logits
49+
rows = n_tokens
50+
cols = self._n_vocab
51+
offset = (
52+
0 if self.context_params.logits_all else n_tokens - 1
53+
) # NOTE: Only save the last token logits if logits_all is False
54+
self.scores[n_past + offset: n_past + n_tokens, :].reshape(-1)[
55+
:
56+
] = self._ctx.get_logits()[offset * cols: rows * cols]
57+
# Update n_tokens
58+
self.n_tokens += n_tokens
59+
60+
61+
for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]:
62+
if lib is not None:
63+
lib.Llama.eval = eval_with_progress

modules/llamacpp_hf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
88
from transformers.modeling_outputs import CausalLMOutputWithPast
99

10-
from modules import RoPE, shared
10+
from modules import RoPE, llama_cpp_python_hijack, shared
1111
from modules.logging_colors import logger
1212

1313
try:

modules/llamacpp_model.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
import torch
66

7-
from modules import RoPE, shared
7+
from modules import RoPE, llama_cpp_python_hijack, shared
88
from modules.callbacks import Iteratorize
99
from modules.logging_colors import logger
1010
from modules.text_generation import get_max_prompt_length

modules/models.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454

5555

5656
def load_model(model_name, loader=None):
57-
logger.info(f"Loading {model_name}")
57+
logger.info(f"Loading \"{model_name}\"")
5858
t0 = time.time()
5959

6060
shared.is_seq2seq = False
@@ -246,7 +246,7 @@ def llamacpp_loader(model_name):
246246
else:
247247
model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.gguf'))[0]
248248

249-
logger.info(f"llama.cpp weights detected: {model_file}")
249+
logger.info(f"llama.cpp weights detected: \"{model_file}\"")
250250
model, tokenizer = LlamaCppModel.from_pretrained(model_file)
251251
return model, tokenizer
252252

@@ -257,7 +257,7 @@ def llamacpp_HF_loader(model_name):
257257
for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
258258
path = Path(f'{shared.args.model_dir}/{fname}')
259259
if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
260-
logger.info(f'Using tokenizer from: {path}')
260+
logger.info(f'Using tokenizer from: \"{path}\"')
261261
break
262262
else:
263263
logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
@@ -298,7 +298,7 @@ def ctransformers_loader(model_name):
298298
logger.error("Could not find a model for ctransformers.")
299299
return None, None
300300

301-
logger.info(f'ctransformers weights detected: {model_file}')
301+
logger.info(f'ctransformers weights detected: \"{model_file}\"')
302302
model, tokenizer = ctrans.from_pretrained(model_file)
303303
return model, tokenizer
304304

@@ -393,7 +393,7 @@ def HQQ_loader(model_name):
393393
from hqq.core.quantize import HQQBackend, HQQLinear
394394
from hqq.engine.hf import HQQModelForCausalLM
395395

396-
logger.info(f"Loading HQQ model with backend: {shared.args.hqq_backend}")
396+
logger.info(f"Loading HQQ model with backend: \"{shared.args.hqq_backend}\"")
397397

398398
model_dir = Path(f'{shared.args.model_dir}/{model_name}')
399399
model = HQQModelForCausalLM.from_quantized(str(model_dir))

modules/presets.py

+44-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import functools
2+
import pprint
23
import random
34
from pathlib import Path
45

@@ -90,7 +91,25 @@ def random_preset(state):
9091
'eta_cutoff': [3, 6, 9, 12, 15, 18],
9192
},
9293
'flatten_distribution': {
93-
'temperature': [0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0],
94+
'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
95+
'dynamic_temperature': [
96+
[0.1, 1],
97+
[0.1, 1.5],
98+
[0.1, 2],
99+
[0.1, 5],
100+
[0.5, 1],
101+
[0.5, 1.5],
102+
[0.5, 2],
103+
[0.5, 5],
104+
[0.8, 1],
105+
[0.8, 1.5],
106+
[0.8, 2],
107+
[0.8, 5],
108+
[1, 1.5],
109+
[1, 2],
110+
[1, 5]
111+
],
112+
'smoothing_factor': [0.2, 0.3, 0.6, 1.2]
94113
},
95114
'repetition': {
96115
'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
@@ -106,26 +125,42 @@ def random_preset(state):
106125
for cat in params_and_values:
107126
choices = list(params_and_values[cat].keys())
108127
if shared.args.loader is not None:
109-
choices = [x for x in choices if x in loaders_samplers[shared.args.loader]]
128+
choices = [x for x in choices if loader_contains(x)]
110129

111130
if len(choices) > 0:
112131
choice = random.choice(choices)
113-
generate_params[choice] = random.choice(params_and_values[cat][choice])
132+
value = random.choice(params_and_values[cat][choice])
133+
if choice == 'dynamic_temperature':
134+
generate_params['dynamic_temperature'] = True
135+
generate_params['dynatemp_low'] = value[0]
136+
generate_params['dynatemp_high'] = value[1]
137+
else:
138+
generate_params[choice] = value
114139

115140
state.update(generate_params)
141+
logger.info("GENERATED_PRESET=")
142+
pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
116143
return state, *[generate_params[k] for k in presets_params()]
117144

118145

119-
def generate_preset_yaml(state):
146+
def loader_contains(sampler):
147+
if sampler == 'dynamic_temperature' and 'dynatemp_low' in loaders_samplers[shared.args.loader]:
148+
return True
149+
else:
150+
return sampler in loaders_samplers[shared.args.loader]
151+
152+
153+
def remove_defaults(state):
120154
defaults = default_preset()
121155
data = {k: state[k] for k in presets_params()}
122156

123-
# Remove entries that are identical to the defaults.
124-
# sampler_priority is always saved because it is experimental
125-
# and the default order may change.
126-
127157
for k in list(data.keys()):
128-
if data[k] == defaults[k] and k != 'sampler_priority':
158+
if data[k] == defaults[k]:
129159
del data[k]
130160

161+
return data
162+
163+
164+
def generate_preset_yaml(state):
165+
data = remove_defaults(state)
131166
return yaml.dump(data, sort_keys=False)

modules/sampler_hijack.py

+1
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ def custom_sort_key(obj):
431431
if shared.args.verbose:
432432
logger.info("WARPERS=")
433433
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint([x.__class__.__name__ for x in warpers])
434+
print()
434435

435436
if normalize is not None:
436437
warpers.append(normalize)

modules/text_generation.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
290290
if k in state:
291291
generate_params[k] = state[k]
292292

293-
if isinstance(state['sampler_priority'], list):
293+
if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
294294
generate_params['sampler_priority'] = state['sampler_priority']
295-
elif isinstance(state['sampler_priority'], str):
295+
elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
296296
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
297297

298298
if state['negative_prompt'] != '':

requirements.txt

+20-20
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@ accelerate==0.25.*
22
colorama
33
datasets
44
einops
5-
exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
65
gradio==3.50.*
7-
hqq==0.1.2.post1
6+
hqq==0.1.3
87
jinja2==3.1.2
98
lm_eval==0.3.0
109
markdown
11-
numpy==1.24.*
10+
numpy==1.26.*
1211
optimum==1.16.*
1312
pandas
14-
peft==0.7.*
13+
peft==0.8.*
1514
Pillow>=9.5.0
1615
pyyaml
1716
requests
@@ -29,32 +28,33 @@ bitsandbytes==0.41.1; platform_system != "Windows"
2928
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
3029

3130
# llama-cpp-python (CPU only, AVX2)
32-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
33-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
34-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
35-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.38+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
31+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
32+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
33+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
34+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
3635

3736
# llama-cpp-python (CUDA, no tensor cores)
38-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
39-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
40-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
41-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.38+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
37+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
38+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
39+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
40+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
4241

4342
# llama-cpp-python (CUDA, tensor cores)
44-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
45-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
46-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
47-
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.38+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
43+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
44+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
45+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
46+
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
4847

4948
# CUDA wheels
5049
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
5150
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
5251
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
5352
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
54-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
55-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
56-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
57-
https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
53+
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.11"
54+
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and platform_machine == "x86_64" and python_version == "3.10"
55+
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
56+
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
57+
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
5858
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
5959
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
6060
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

0 commit comments

Comments
 (0)