diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl index 327c3868847..4b18abbb1c5 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.glsl @@ -22,7 +22,13 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_buffer(B, "r", "nchw_in", "int")} -${layout_declare_ubo(B, "ivec4", "sizes")} + +$if USE_PUSH_CONST: + layout(push_constant) uniform restrict Block { + ivec4 sizes; + }; +$else: + ${layout_declare_ubo(B, "ivec4", "sizes")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml index 506a66c0d27..0b8bbecb7bd 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_bitw8_image_nobitw8buffer.yaml @@ -8,6 +8,7 @@ nchw_to_bitw8_image_nobitw8buffer: parameter_names_with_default_values: STORAGE: texture3d DTYPE: int8 + USE_PUSH_CONST: True generate_variant_forall: STORAGE: - VALUE: texture2d @@ -17,3 +18,5 @@ nchw_to_bitw8_image_nobitw8buffer: - VALUE: uint8 shader_variants: - NAME: nchw_to_bitw8_image_nobitw8buffer + - NAME: nchw_to_bitw8_image_nobitw8buffer_no_pc + USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl index 32235a9ad65..ba4e4dd9dd9 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl @@ -12,9 +12,17 @@ layout(std430) buffer; ${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_tensor(1, "r", "nchw_in", DTYPE, STORAGE)} -${layout_declare_ubo(2, "ivec4", "out_sizes")} -${layout_declare_ubo(3, "ivec4", "out_strides")} -${layout_declare_ubo(4, "int", "numel")} + +$if USE_PUSH_CONST: + layout(push_constant) uniform restrict Block { + ivec4 out_sizes; + ivec4 out_strides; + int numel; + }; +$else: + ${layout_declare_ubo(2, "ivec4", "out_sizes")} + ${layout_declare_ubo(3, "ivec4", "out_strides")} + ${layout_declare_ubo(4, "int", "numel")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml index a85c1ec6c65..486d710cf55 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml @@ -8,6 +8,7 @@ nchw_to_buffer: parameter_names_with_default_values: DTYPE: float STORAGE: buffer + USE_PUSH_CONST: True generate_variant_forall: DTYPE: - VALUE: half @@ -17,3 +18,5 @@ nchw_to_buffer: - VALUE: uint8 shader_variants: - NAME: nchw_to_buffer + - NAME: nchw_to_buffer_no_pc + USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl index 2f55535c82c..4674822ce6a 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.glsl @@ -21,9 +21,17 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_buffer(B, "r", "buf_in", DTYPE)} -${layout_declare_ubo(B, "ivec4", "sizes")} -$if not FROM_STAGING: - ${layout_declare_ubo(B, "ivec4", "buf_strides")} + +$if USE_PUSH_CONST: + layout(push_constant) uniform restrict Block { + ivec4 sizes; + $if not FROM_STAGING: + ivec4 buf_strides; + }; +$else: + ${layout_declare_ubo(B, "ivec4", "sizes")} + $if not FROM_STAGING: + ${layout_declare_ubo(B, "ivec4", "buf_strides")} #include "indexing_utils.h" diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml index 9d17ff5f645..7e52ec10376 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_image.yaml @@ -9,6 +9,7 @@ nchw_to_image: STORAGE: texture3d DTYPE: float FROM_STAGING: True + USE_PUSH_CONST: True generate_variant_forall: DTYPE: - VALUE: half @@ -22,3 +23,11 @@ nchw_to_image: STORAGE: texture2d - NAME: clone_buffer_to_image FROM_STAGING: False + - NAME: nchw_to_image_no_pc_texture3d + USE_PUSH_CONST: False + - NAME: nchw_to_image_no_pc_texture2d + STORAGE: texture2d + USE_PUSH_CONST: False + - NAME: clone_buffer_to_image_no_pc + FROM_STAGING: False + USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp index b547bc3572d..d0276b1783b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp @@ -105,9 +105,9 @@ void add_buffer_to_image_node( // Input and Outputs {{image, vkapi::kWrite}, {buffer, vkapi::kRead}}, // Parameter Buffers - {graph.sizes_ubo(image), graph.strides_ubo(buffer)}, - // Push Constants {}, + // Push Constants + {graph.sizes_pc_of(image), graph.strides_pc_of(buffer)}, // Specialization Constants {graph.hashed_layout_of(image)}, // Resize Args diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index fbe4a61befc..32f478fa5bd 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -106,9 +106,10 @@ ValueRef prepack_biases( graph.create_local_wg_size(v), vref, v, - {t->sizes_ubo()}, + {}, // Specialization constants - {t->hashed_layout()})); + {t->hashed_layout()}, + {graph.sizes_pc_of(v)})); return v; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index f39b0fc33ff..8c060a9da4b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -28,14 +28,14 @@ void add_staging_to_tensor_node( vkapi::ShaderInfo shader = get_nchw_to_tensor_shader( *graph.get_tensor(out_tensor), graph.int8_buffers_enabled()); - vkapi::ParamsBindList ubos; + std::vector pcs; if (graph.is_buffer_storage(out_tensor)) { - ubos.append( - {graph.sizes_ubo(out_tensor), - graph.strides_ubo(out_tensor), - graph.numel_ubo(out_tensor)}); + pcs = { + graph.sizes_pc_of(out_tensor), + graph.strides_pc_of(out_tensor), + graph.numel_pc_of(out_tensor)}; } else { - ubos.append({graph.sizes_ubo(out_tensor)}); + pcs = {graph.sizes_pc_of(out_tensor)}; } graph.execute_nodes().emplace_back(new DispatchNode( @@ -46,9 +46,9 @@ void add_staging_to_tensor_node( // Input and Outputs {{out_tensor, vkapi::kWrite}, {in_staging, vkapi::kRead}}, // Parameter Buffers - ubos, - // Push Constants {}, + // Push Constants + pcs, // Specialization Constants {graph.hashed_layout_of(out_tensor)}, // Resize Args @@ -127,14 +127,14 @@ void add_prepack_standard_node( vkapi::ShaderInfo shader = get_nchw_to_tensor_shader( *graph.get_tensor(tensor), graph.int8_buffers_enabled()); - vkapi::ParamsBindList ubos; + std::vector pcs; if (graph.is_buffer_storage(tensor)) { - ubos.append( - {graph.sizes_ubo(tensor), - graph.strides_ubo(tensor), - graph.numel_ubo(tensor)}); + pcs = { + graph.sizes_pc_of(tensor), + graph.strides_pc_of(tensor), + graph.numel_pc_of(tensor)}; } else { - ubos.append({graph.sizes_ubo(tensor)}); + pcs = {graph.sizes_pc_of(tensor)}; } int transpose_hw_spec = transpose_hw ? 1 : 0; @@ -148,9 +148,10 @@ void add_prepack_standard_node( tensor_data, tensor, // Parameter Buffers - ubos, + {}, // Specialization Constants - {graph.hashed_layout_of(tensor), transpose_hw_spec})); + {graph.hashed_layout_of(tensor), transpose_hw_spec}, + pcs)); } ValueRef prepack_standard( diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp index fd7e6b78c22..6f3660fb0fc 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp @@ -22,13 +22,17 @@ bool is_bitw8(vkapi::ScalarType dtype) { vkapi::ShaderInfo get_nchw_to_tensor_shader( const api::vTensor& v_dst, - const bool int8_buffer_enabled) { + bool int8_buffer_enabled, + bool push_constant_variant) { std::string kernel_name; kernel_name.reserve(kShaderNameReserve); if (is_bitw8(v_dst.dtype()) && v_dst.storage_type() != utils::kBuffer && !int8_buffer_enabled) { kernel_name = "nchw_to_bitw8_image_nobitw8buffer"; + if (!push_constant_variant) { + kernel_name += "_no_pc"; + } add_storage_type_suffix(kernel_name, v_dst); add_dtype_suffix(kernel_name, v_dst); return VK_KERNEL_FROM_STR(kernel_name); @@ -36,11 +40,17 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader( if (v_dst.storage_type() == utils::kBuffer) { kernel_name = "nchw_to_buffer"; + if (!push_constant_variant) { + kernel_name += "_no_pc"; + } add_dtype_suffix(kernel_name, v_dst); return VK_KERNEL_FROM_STR(kernel_name); } kernel_name = "nchw_to_image"; + if (!push_constant_variant) { + kernel_name += "_no_pc"; + } add_storage_type_suffix(kernel_name, v_dst); add_dtype_suffix(kernel_name, v_dst); diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h index 8d63958a738..6abbac45823 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h @@ -14,7 +14,8 @@ namespace vkcompute { vkapi::ShaderInfo get_nchw_to_tensor_shader( const api::vTensor& v_dst, - bool int8_buffer_enabled = true); + bool int8_buffer_enabled = true, + bool push_constant_variant = true); vkapi::ShaderInfo get_tensor_to_nchw_shader( const api::vTensor& v_src, bool int8_buffer_enabled = true); diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index c4acb41b7b0..dcd8c425d62 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -28,7 +28,7 @@ void record_nchw_to_buffer_op( vkapi::PipelineBarrier pipeline_barrier{}; context->submit_compute_job( - get_nchw_to_tensor_shader(v_dst), + get_nchw_to_tensor_shader(v_dst, true, false), pipeline_barrier, {uint32_t(v_dst.numel()), 1, 1}, {64, 1, 1}, @@ -74,7 +74,9 @@ void record_nchw_to_image_op( context->submit_compute_job( get_nchw_to_tensor_shader( - v_dst, context->adapter_ptr()->has_full_int8_buffers_support()), + v_dst, + context->adapter_ptr()->has_full_int8_buffers_support(), + false), pipeline_barrier, v_dst.logical_limits(), adaptive_work_group_size(v_dst.logical_limits()), diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 60dfb3b8606..85811aaaf11 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1601,8 +1601,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { /*shared_object_idx = */ 4); // +2: t.sizes_ubo() for each staging shader - // +2: staging buffer for each input tensor - expected_vma_allocation_count += 4; + expected_vma_allocation_count += 2; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); ValueRef c = graph.add_tensor( @@ -1622,8 +1621,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { /*shared_object_idx = */ 2); // +1: t.sizes_ubo() uniform buffer for staging shader - // +1: staging buffer for the input tensor - expected_vma_allocation_count += 2; + expected_vma_allocation_count += 1; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); ValueRef e = graph.add_tensor(