@@ -126,6 +126,7 @@ class ExLlamaV2Config:
126
126
checkpoint_fused_mlp : bool
127
127
checkpoint_offset_qzeros : bool
128
128
mrope_section : list | None
129
+ attention_multiplier : float | None
129
130
130
131
vision_model_type : str | None
131
132
vision_head_dim : int | None
@@ -289,6 +290,7 @@ def prepare(self, no_tensors: bool = False):
289
290
self .use_qk_norm = read (read_config , bool , ["use_qk_norm" ], False )
290
291
291
292
self .query_pre_attn_scalar = read (read_config , float , "query_pre_attn_scalar" , None )
293
+ self .attention_multiplier = read (read_config , float , "attention_multiplier" , None )
292
294
293
295
# MLP params
294
296
@@ -309,16 +311,18 @@ def prepare(self, no_tensors: bool = False):
309
311
310
312
# Logit/embedding/residual scale
311
313
312
- self .logit_scale = read (read_config , float , "logit_scale" , 1 )
314
+ self .logit_scale = read (read_config , float , [ "logit_scale" , "logits_scaling" ] , 1 )
313
315
if self .arch .lm .logit_scale_basedim :
314
316
dim_model_base = read (read_config , int , "dim_model_base" , self .hidden_size )
315
317
self .logit_scale /= (self .hidden_size / dim_model_base )
316
318
317
- self .scale_emb = read (read_config , float , "scale_emb" , 1 )
319
+ self .scale_emb = read (read_config , float , ["scale_emb" , "embedding_multiplier" ], 1 )
320
+ residual_multiplier = read (read_config , float , "residual_multiplier" , None )
318
321
scale_depth = read (read_config , float , "scale_depth" , None )
319
- if scale_depth is None :
320
- self .scale_depth = 1
321
- else :
322
+ self .scale_depth = 1
323
+ if residual_multiplier :
324
+ self .scale_depth = residual_multiplier
325
+ elif scale_depth :
322
326
self .scale_depth = scale_depth / math .sqrt (self .num_hidden_layers )
323
327
324
328
self .attn_logit_softcapping = read (read_config , float , "attn_logit_softcapping" , None )
0 commit comments