File tree 3 files changed +5
-4
lines changed
3 files changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -168,7 +168,8 @@ jobs:
168
168
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
169
169
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
170
170
# Limit MAX_JOBS otherwise the github runner goes OOM
171
- MAX_JOBS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
171
+ # CUDA 11.8 can compile with 2 jobs, but CUDA 12.2 goes OOM
172
+ MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "122" ] && echo 1 || echo 2) FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist
172
173
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
173
174
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
174
175
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
Original file line number Diff line number Diff line change 1
- __version__ = "2.5.9"
1
+ __version__ = "2.5.9.post1 "
2
2
3
3
from flash_attn .flash_attn_interface import (
4
4
flash_attn_func ,
Original file line number Diff line number Diff line change @@ -85,7 +85,7 @@ RUN pip install transformers==4.25.1 datasets==2.8.0 pytorch-lightning==1.8.6 tr
85
85
RUN pip install git+https://github.com/mlcommons/logging.git@2.1.0
86
86
87
87
# Install FlashAttention
88
- RUN pip install flash-attn==2.5.9
88
+ RUN pip install flash-attn==2.5.9.post1
89
89
90
90
# Install CUDA extensions for fused dense
91
- RUN pip install git+https://github.com/HazyResearch/flash-attention@v2.5.9#subdirectory=csrc/fused_dense_lib
91
+ RUN pip install git+https://github.com/HazyResearch/flash-attention@v2.5.9.post1 #subdirectory=csrc/fused_dense_lib
You can’t perform that action at this time.
0 commit comments