We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8db1dd8 commit ad8c766Copy full SHA for ad8c766
ChatQnA/kubernetes/helm/faqgen-rocm-tgi-values.yaml
@@ -21,7 +21,7 @@ tgi:
21
MAX_TOTAL_TOKENS: "4096"
22
PYTORCH_TUNABLEOP_ENABLED: "0"
23
USE_FLASH_ATTENTION: "true"
24
- FLASH_ATTENTION_RECOMPUTE: "true"
+ FLASH_ATTENTION_RECOMPUTE: "false"
25
HIP_VISIBLE_DEVICES: "0,1"
26
MAX_BATCH_SIZE: "2"
27
extraCmdArgs: [ "--num-shard","2" ]
ChatQnA/kubernetes/helm/rocm-tgi-values.yaml
@@ -15,7 +15,7 @@ tgi:
15
16
17
18
19
20
0 commit comments