support llama

youkaichao · youkaichao · commit 7dfddcdecf8a · 2024-10-03T14:25:04.000-07:00
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -28,6 +28,7 @@
 from transformers import LlamaConfig
 
 from vllm.attention import Attention, AttentionMetadata
+from vllm.compilation.decorators import support_compile_llama_style
 from vllm.config import CacheConfig, LoRAConfig
 from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank,
                               get_tensor_model_parallel_world_size)
@@ -344,6 +345,7 @@ def forward(
         return hidden_states
 
 
+@support_compile_llama_style
 class LlamaForCausalLM(nn.Module, SupportsLoRA):
     packed_modules_mapping = {
         "qkv_proj": [