Add pin_memory() call

peng1999 · peng1999 · commit 932030160816 · 2024-07-16T10:10:46.000+08:00
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
@@ -522,8 +522,8 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
             pin_memory=pin_memory,
         )
         if do_penalties:
-            prompt_tensor = torch.from_numpy(prompt_padded_tokens)
-            output_tensor = torch.from_numpy(output_padded_tokens)
+            prompt_tensor = torch.from_numpy(prompt_padded_tokens).pin_memory()
+            output_tensor = torch.from_numpy(output_padded_tokens).pin_memory()
         else:
             prompt_tensor = None
             output_tensor = None