Skip to content

Commit 75f969a

Browse files
committed
Disable cudaMallocAsync for post2 release
1 parent 0535783 commit 75f969a

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

exllamav2/model.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@
99
# Set CUDA context to lazy loading since we won't need 95% of the modules in Torch
1010
os.environ["CUDA_MODULE_LOADING"] = "LAZY"
1111

12-
# Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already
13-
# imported in which case changing the allocator would cause it to crash
14-
if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
15-
try:
16-
x = torch.__version__
17-
# TODO: Should maybe be a warning here?
18-
except NameError:
19-
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
12+
# Disabled for 0.0.13.post2
13+
#
14+
# # Set cudaMallocAsync allocator by default as it appears slightly more memory efficient, unless Torch is already
15+
# # imported in which case changing the allocator would cause it to crash
16+
# if not "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
17+
# try:
18+
# x = torch.__version__
19+
# # TODO: Should maybe be a warning here?
20+
# except NameError:
21+
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "backend:cudaMallocAsync"
2022

2123
import torch
2224
import math

0 commit comments

Comments
 (0)