Skip to content

Commit ab1091d

Browse files
authored
[Misc][Attention][Quantization] init property earlier (#13733)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 1e15aae commit ab1091d

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

vllm/attention/layer.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ def __init__(
8585
self._k_scale_float = 1.0
8686
self._v_scale_float = 1.0
8787

88+
self.num_heads = num_heads
89+
self.head_size = head_size
90+
self.num_kv_heads = num_kv_heads
91+
self.sliding_window = sliding_window
92+
8893
quant_method = quant_config.get_quant_method(
8994
self, prefix=prefix) if quant_config else None
9095
if quant_method is not None:
@@ -116,10 +121,6 @@ def __init__(
116121
alibi_slopes, sliding_window, kv_cache_dtype,
117122
blocksparse_params, logits_soft_cap, attn_type,
118123
**extra_impl_args)
119-
self.num_heads = num_heads
120-
self.head_size = head_size
121-
self.num_kv_heads = num_kv_heads
122-
self.sliding_window = sliding_window
123124
self.backend = backend_name_to_enum(attn_backend.get_name())
124125
self.dtype = dtype
125126

0 commit comments

Comments
 (0)