|
45 | 45 | os: [ubuntu-20.04]
|
46 | 46 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
|
47 | 47 | torch-version: ['2.0.1', '2.1.2', '2.2.2', '2.3.1', '2.4.0.dev20240514']
|
48 |
| - cuda-version: ['11.8.0', '12.2.2'] |
| 48 | + cuda-version: ['11.8.0', '12.3.2'] |
49 | 49 | # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
|
50 | 50 | # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
|
51 | 51 | # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
|
|
60 | 60 | python-version: '3.12'
|
61 | 61 | # Pytorch <= 2.0 only supports CUDA <= 11.8
|
62 | 62 | - torch-version: '2.0.1'
|
63 |
| - cuda-version: '12.2.2' |
| 63 | + cuda-version: '12.3.2' |
64 | 64 |
|
65 | 65 | steps:
|
66 | 66 | - name: Checkout
|
@@ -145,8 +145,8 @@ jobs:
|
145 | 145 | export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
|
146 | 146 | export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
|
147 | 147 | # Limit MAX_JOBS otherwise the github runner goes OOM
|
148 |
| - # CUDA 11.8 can compile with 2 jobs, but CUDA 12.2 goes OOM |
149 |
| - MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "122" ] && echo 1 || echo 2) FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist |
| 148 | + # CUDA 11.8 can compile with 2 jobs, but CUDA 12.3 goes OOM |
| 149 | + MAX_JOBS=$([ "$MATRIX_CUDA_VERSION" == "123" ] && echo 1 || echo 2) FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist |
150 | 150 | tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }}
|
151 | 151 | wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
|
152 | 152 | ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
|
|
0 commit comments