File tree 1 file changed +3
-0
lines changed
docs/source/features/quantization
1 file changed +3
-0
lines changed Original file line number Diff line number Diff line change @@ -30,6 +30,7 @@ from vllm import LLM
30
30
model = LLM(" facebook/opt-125m" , quantization = " fp8" )
31
31
# INFO 06-10 17:55:42 model_runner.py:157] Loading model weights took 0.1550 GB
32
32
result = model.generate(" Hello, my name is" )
33
+ print (result[0 ].outputs[0 ].text)
33
34
```
34
35
35
36
:::{warning}
@@ -106,6 +107,7 @@ Load and run the model in `vllm`:
106
107
from vllm import LLM
107
108
model = LLM(" ./Meta-Llama-3-8B-Instruct-FP8-Dynamic" )
108
109
model.generate(" Hello my name is" )
110
+ print (result[0 ].outputs[0 ].text)
109
111
```
110
112
111
113
Evaluate accuracy with ` lm_eval ` (for example on 250 samples of ` gsm8k ` ):
@@ -188,4 +190,5 @@ from vllm import LLM
188
190
model = LLM(model = " Meta-Llama-3-8B-Instruct-FP8/" )
189
191
# INFO 06-10 21:15:41 model_runner.py:159] Loading model weights took 8.4596 GB
190
192
result = model.generate(" Hello, my name is" )
193
+ print (result[0 ].outputs[0 ].text)
191
194
```
You can’t perform that action at this time.
0 commit comments