Skip to content

Commit 817c623

Browse files
committed
Merge remote-tracking branch 'origin/nomic' into nomic
2 parents ed18ba7 + 4605be9 commit 817c623

File tree

147 files changed

+6208
-1091
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

147 files changed

+6208
-1091
lines changed

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ docker run --privileged --net host --shm-size=16G -it \
5050
&& pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py \
5151
&& echo TEST_12 \
5252
&& pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py" \
53+
# Disable the TPU LoRA tests until the feature is activated
54+
# && echo TEST_13 \
55+
# && pytest -s -v /workspace/vllm/tests/tpu/lora/" \
5356

5457

5558
# TODO: This test fails because it uses RANDOM_SEED sampling

.github/ISSUE_TEMPLATE/400-bug-report.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ body:
7575
```
7676
7777
```
78-
The error message you got, with the full traceback.
78+
The error message you got, with the full traceback and the error logs with [dump_input.py:##] if present.
7979
```
8080
validations:
8181
required: true

benchmarks/kernels/benchmark_moe.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
from contextlib import nullcontext
77
from datetime import datetime
88
from itertools import product
9+
from types import SimpleNamespace
910
from typing import Any, TypedDict
1011

1112
import ray
1213
import torch
1314
from ray.experimental.tqdm_ray import tqdm
14-
from transformers import AutoConfig
1515

1616
from vllm.model_executor.layers.fused_moe.fused_moe import *
1717
from vllm.platforms import current_platform
18+
from vllm.transformers_utils.config import get_config
1819
from vllm.triton_utils import triton
1920
from vllm.utils import FlexibleArgumentParser
2021

@@ -534,8 +535,12 @@ def get_weight_block_size_safety(config, default_value=None):
534535
def main(args: argparse.Namespace):
535536
print(args)
536537

537-
config = AutoConfig.from_pretrained(
538-
args.model, trust_remote_code=args.trust_remote_code)
538+
config = get_config(model=args.model,
539+
trust_remote_code=args.trust_remote_code)
540+
if args.model_prefix:
541+
config = getattr(config, args.model_prefix)
542+
config = SimpleNamespace(**config)
543+
539544
if config.architectures[0] == "DbrxForCausalLM":
540545
E = config.ffn_config.moe_num_experts
541546
topk = config.ffn_config.moe_top_k
@@ -546,15 +551,14 @@ def main(args: argparse.Namespace):
546551
topk = config.num_experts_per_tok
547552
intermediate_size = config.intermediate_size
548553
shard_intermediate_size = 2 * intermediate_size // args.tp_size
549-
elif (config.architectures[0] == "DeepseekV3ForCausalLM"
550-
or config.architectures[0] == "DeepseekV2ForCausalLM"):
554+
elif (config.architectures[0]
555+
in ("DeepseekV3ForCausalLM", "DeepseekV2ForCausalLM")):
551556
E = config.n_routed_experts
552557
topk = config.num_experts_per_tok
553558
intermediate_size = config.moe_intermediate_size
554559
shard_intermediate_size = 2 * intermediate_size // args.tp_size
555-
elif config.architectures[0] in [
556-
"Qwen2MoeForCausalLM", "Qwen3MoeForCausalLM"
557-
]:
560+
elif config.architectures[0] in ("Qwen2MoeForCausalLM",
561+
"Qwen3MoeForCausalLM"):
558562
E = config.num_experts
559563
topk = config.num_experts_per_tok
560564
intermediate_size = config.moe_intermediate_size
@@ -569,7 +573,8 @@ def main(args: argparse.Namespace):
569573
shard_intermediate_size = 2 * intermediate_size // args.tp_size
570574

571575
hidden_size = config.hidden_size
572-
dtype = torch.float16 if current_platform.is_rocm() else config.torch_dtype
576+
dtype = torch.float16 if current_platform.is_rocm() else getattr(
577+
torch, config.torch_dtype)
573578
use_fp8_w8a8 = args.dtype == "fp8_w8a8"
574579
use_int8_w8a16 = args.dtype == "int8_w8a16"
575580
block_quant_shape = get_weight_block_size_safety(config)
@@ -659,6 +664,7 @@ def _distribute(method: str, inputs: list[Any]) -> list[Any]:
659664
parser.add_argument("--batch-size", type=int, required=False)
660665
parser.add_argument("--tune", action="store_true")
661666
parser.add_argument("--trust-remote-code", action="store_true")
667+
parser.add_argument("--model-prefix", type=str, required=False)
662668
args = parser.parse_args()
663669

664670
main(args)

cmake/cpu_extension.cmake

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,33 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
167167

168168
FetchContent_MakeAvailable(oneDNN)
169169

170+
list(APPEND LIBS dnnl)
171+
elseif(POWER10_FOUND)
172+
FetchContent_Declare(
173+
oneDNN
174+
GIT_REPOSITORY https://github.com/oneapi-src/oneDNN.git
175+
GIT_TAG v3.7.2
176+
GIT_PROGRESS TRUE
177+
GIT_SHALLOW TRUE
178+
)
179+
180+
set(ONEDNN_LIBRARY_TYPE "STATIC")
181+
set(ONEDNN_BUILD_DOC "OFF")
182+
set(ONEDNN_BUILD_EXAMPLES "OFF")
183+
set(ONEDNN_BUILD_TESTS "OFF")
184+
set(ONEDNN_ENABLE_WORKLOAD "INFERENCE")
185+
set(ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER")
186+
set(ONEDNN_BUILD_GRAPH "OFF")
187+
set(ONEDNN_ENABLE_JIT_PROFILING "OFF")
188+
set(ONEDNN_ENABLE_ITT_TASKS "OFF")
189+
set(ONEDNN_ENABLE_MAX_CPU_ISA "OFF")
190+
set(ONEDNN_ENABLE_CPU_ISA_HINTS "OFF")
191+
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
192+
193+
set(DNNL_CPU_RUNTIME "OMP")
194+
195+
FetchContent_MakeAvailable(oneDNN)
196+
170197
list(APPEND LIBS dnnl)
171198
endif()
172199

@@ -197,6 +224,10 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED)
197224
"csrc/cpu/quant.cpp"
198225
"csrc/cpu/shm.cpp"
199226
${VLLM_EXT_SRC})
227+
elseif(POWER10_FOUND)
228+
set(VLLM_EXT_SRC
229+
"csrc/cpu/quant.cpp"
230+
${VLLM_EXT_SRC})
200231
endif()
201232

202233
#
@@ -214,4 +245,4 @@ define_gpu_extension_target(
214245
WITH_SOABI
215246
)
216247

217-
message(STATUS "Enabling C extension.")
248+
message(STATUS "Enabling C extension.")

0 commit comments

Comments
 (0)