diff --git a/src/axolotl/integrations/liger/__init__.py b/src/axolotl/integrations/liger/__init__.py index bf4c83af4f..f78083300d 100644 --- a/src/axolotl/integrations/liger/__init__.py +++ b/src/axolotl/integrations/liger/__init__.py @@ -22,8 +22,7 @@ from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss from liger_kernel.transformers.geglu import LigerGEGLUMLP -from liger_kernel.transformers.model.llama import lce_forward -from liger_kernel.transformers.model.qwen2 import lce_forward as qwen2_lce_forward +from liger_kernel.transformers.model.llama import lce_forward as llama_lce_forward from liger_kernel.transformers.rms_norm import LigerRMSNorm from liger_kernel.transformers.rope import liger_rotary_pos_emb from liger_kernel.transformers.swiglu import LigerSwiGLUMLP @@ -54,7 +53,7 @@ def pre_model_load(self, cfg): if cfg.liger_cross_entropy: modeling_llama.CrossEntropyLoss = LigerCrossEntropyLoss elif cfg.liger_fused_linear_cross_entropy: - modeling_llama.LlamaForCausalLM.forward = lce_forward + modeling_llama.LlamaForCausalLM.forward = llama_lce_forward elif cfg.model_config_type == "mistral": from transformers.models.mistral import modeling_mistral @@ -105,6 +104,9 @@ def pre_model_load(self, cfg): modeling_jamba.JambaForCausalLM.forward = jamba_lce_forward elif cfg.model_config_type == "qwen2": + from liger_kernel.transformers.model.qwen2 import ( + lce_forward as qwen2_lce_forward, + ) from transformers.models.qwen2 import modeling_qwen2 if cfg.liger_rope: