From 82cf6b5c30e806bd493a54e3a9adf4ca77e18ada Mon Sep 17 00:00:00 2001 From: Lu Fang Date: Wed, 7 May 2025 15:12:01 -0700 Subject: [PATCH] Eliminate c10::optional usage in vllm/csrc Summary: c10::optional will fail internal build. So replace with std::optional Differential Revision: D74356723 --- csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu | 4 ++-- csrc/quantization/gptq_allspark/allspark_repack.cu | 4 ++-- csrc/rocm/attention.cu | 4 ++-- csrc/rocm/ops.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu b/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu index ea3bb429904..03bd5964a7f 100644 --- a/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu +++ b/csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu @@ -9,7 +9,7 @@ at::Tensor as_g_workspace; torch::Tensor allspark_w8a16_gemm( torch::Tensor const& a, torch::Tensor const& b_qweight, - torch::Tensor const& b_scales, c10::optional const& b_qzeros, + torch::Tensor const& b_scales, std::optional const& b_qzeros, int64_t n, int64_t group_size, int64_t sm_count, int64_t sm_version, int64_t CUBLAS_M_THRESHOLD, bool has_zp, bool n32k16_reorder) { TORCH_CHECK_NOT_IMPLEMENTED( @@ -918,7 +918,7 @@ void allspark_qgemm_w8a16_perc_ampere( torch::Tensor allspark_w8a16_gemm( torch::Tensor const& a, torch::Tensor const& b_qweight, - torch::Tensor const& b_scales, c10::optional const& b_qzeros, + torch::Tensor const& b_scales, std::optional const& b_qzeros, int64_t n, int64_t group_size, int64_t sm_count, int64_t sm_version, int64_t CUBLAS_M_THRESHOLD, bool has_zp, bool n32k16_reorder) { // Verify device and strides diff --git a/csrc/quantization/gptq_allspark/allspark_repack.cu b/csrc/quantization/gptq_allspark/allspark_repack.cu index ea8eccf040d..7a5b2f95cc2 100644 --- a/csrc/quantization/gptq_allspark/allspark_repack.cu +++ b/csrc/quantization/gptq_allspark/allspark_repack.cu @@ -100,9 +100,9 @@ void rearrange_kn_weight_as_n32k16_order_ldg16( void rearrange_kn_weight_as_n32k16_order( torch::Tensor const& b_qweight, torch::Tensor const& b_scales, - c10::optional const& b_zeros, bool has_zp, + std::optional const& b_zeros, bool has_zp, torch::Tensor& b_qweight_reorder, torch::Tensor& b_scales_reorder, - c10::optional const& b_zeros_reorder, const int64_t K, + std::optional const& b_zeros_reorder, const int64_t K, const int64_t N, const int64_t N_32align) { // Verify device and strides TORCH_CHECK(b_qweight.device().is_cuda(), "b_qweight is not on GPU"); diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index f8ae4b65d43..8cc5a0f4f21 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -1597,7 +1597,7 @@ void paged_attention_custom_launcher( torch::Tensor& block_tables, torch::Tensor& context_lens, const std::optional& query_start_loc, int max_context_len, const std::optional& alibi_slopes, torch::Tensor& k_scale, - torch::Tensor& v_scale, const c10::optional& fp8_out_scale) { + torch::Tensor& v_scale, const std::optional& fp8_out_scale) { int num_seqs = block_tables.size(0); int num_heads = query.size(1); int head_size = query.size(2); @@ -1825,7 +1825,7 @@ void paged_attention( const std::optional& alibi_slopes, const std::string& kv_cache_dtype, torch::Tensor& k_scale, torch::Tensor& v_scale, - const c10::optional& fp8_out_scale) { + const std::optional& fp8_out_scale) { // clang-format on const int head_size = query.size(2); if (kv_cache_dtype == "auto") { diff --git a/csrc/rocm/ops.h b/csrc/rocm/ops.h index 2252ea71722..e538197dbcb 100644 --- a/csrc/rocm/ops.h +++ b/csrc/rocm/ops.h @@ -19,4 +19,4 @@ void paged_attention( const std::optional& query_start_loc, int64_t block_size, int64_t max_context_len, const std::optional& alibi_slopes, const std::string& kv_cache_dtype, torch::Tensor& k_scale, - torch::Tensor& v_scale, const c10::optional& fp8_out_scale); + torch::Tensor& v_scale, const std::optional& fp8_out_scale);