diff --git a/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl index 34e80b6ec11..ac39dd36fc3 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.glsl @@ -20,8 +20,15 @@ layout(std430) buffer; ${layout_declare_buffer(B, "w", "nchw_out", "int")} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} -${layout_declare_ubo(B, "ivec4", "tensor_sizes")} -${layout_declare_ubo(B, "int", "out_numel")} + +$if USE_PUSH_CONST: + layout(push_constant) uniform restrict Block { + ivec4 tensor_sizes; + int out_numel; + }; +$else: + ${layout_declare_ubo(B, "ivec4", "tensor_sizes")} + ${layout_declare_ubo(B, "int", "out_numel")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; diff --git a/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.yaml index e1574d7fc0f..0386c261203 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/bitw8_image_to_nchw_nobitw8buffer.yaml @@ -8,6 +8,7 @@ bitw8_image_to_nchw_nobitw8buffer: parameter_names_with_default_values: STORAGE: texture3d DTYPE: int8 + USE_PUSH_CONST: True generate_variant_forall: STORAGE: - VALUE: texture2d @@ -17,3 +18,5 @@ bitw8_image_to_nchw_nobitw8buffer: - VALUE: uint8 shader_variants: - NAME: bitw8_image_to_nchw_nobitw8buffer + - NAME: bitw8_image_to_nchw_nobitw8buffer_no_pc + USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl index 201b4d17262..423c4df2679 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl @@ -12,9 +12,17 @@ layout(std430) buffer; ${layout_declare_tensor(0, "w", "nchw_buf", DTYPE, STORAGE)} ${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)} -${layout_declare_ubo(2, "ivec4", "in_sizes")} -${layout_declare_ubo(3, "ivec4", "in_strides")} -${layout_declare_ubo(4, "int", "numel")} + +$if USE_PUSH_CONST: + layout(push_constant) uniform restrict Block { + ivec4 in_sizes; + ivec4 in_strides; + int numel; + }; +$else: + ${layout_declare_ubo(2, "ivec4", "in_sizes")} + ${layout_declare_ubo(3, "ivec4", "in_strides")} + ${layout_declare_ubo(4, "int", "numel")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml index 25b3657c2eb..e48eab63a64 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml @@ -8,6 +8,7 @@ buffer_to_nchw: parameter_names_with_default_values: DTYPE: float STORAGE: buffer + USE_PUSH_CONST: True generate_variant_forall: DTYPE: - VALUE: half @@ -17,3 +18,5 @@ buffer_to_nchw: - VALUE: uint8 shader_variants: - NAME: buffer_to_nchw + - NAME: buffer_to_nchw_no_pc + USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl index 49ce76423d5..f5361d40b66 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_prepack_weights.glsl @@ -26,11 +26,8 @@ layout(set = 0, binding = 1) buffer PRECISION restrict readonly Buffer { BUF_T buffer_in[]; }; -layout(set = 0, binding = 2) uniform PRECISION restrict Sizes { +layout(push_constant) uniform PRECISION restrict Block { ivec4 sizes; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict OriginalSizes { ivec4 original_sizes; }; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl index 4e8bff94947..d2f3f615f74 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_prepack_weights.glsl @@ -26,11 +26,8 @@ layout(set = 0, binding = 1) buffer PRECISION restrict readonly Buffer { BUF_T buffer_in[]; }; -layout(set = 0, binding = 2) uniform PRECISION restrict Sizes { +layout(push_constant) uniform PRECISION restrict Block { ivec4 sizes; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict OriginalSizes { ivec4 original_sizes; }; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl index df8589e737f..0b10683cee4 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d_prepack_weights.glsl @@ -26,11 +26,8 @@ layout(set = 0, binding = 1) buffer PRECISION restrict readonly Buffer { BUF_T buffer_in[]; }; -layout(set = 0, binding = 2) uniform PRECISION restrict Sizes { +layout(push_constant) uniform PRECISION restrict Block { ivec4 sizes; -}; - -layout(set = 0, binding = 3) uniform PRECISION restrict OriginalSizes { ivec4 original_sizes; }; diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl index afdc35a8861..d7bef9f0163 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl @@ -21,9 +21,17 @@ layout(std430) buffer; ${layout_declare_buffer(B, "w", "buf_out", DTYPE)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} -${layout_declare_ubo(B, "ivec4", "sizes")} -$if not TO_STAGING: - ${layout_declare_ubo(B, "ivec4", "buf_strides")} + +$if USE_PUSH_CONST: + layout(push_constant) uniform restrict Block { + ivec4 sizes; + $if not TO_STAGING: + ivec4 buf_strides; + }; +$else: + ${layout_declare_ubo(B, "ivec4", "sizes")} + $if not TO_STAGING: + ${layout_declare_ubo(B, "ivec4", "buf_strides")} #include "indexing_utils.h" diff --git a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml index c1045d93afc..804ce19bdb8 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml @@ -9,6 +9,7 @@ image_to_nchw: DTYPE: float STORAGE: texture3d TO_STAGING: True + USE_PUSH_CONST: True generate_variant_forall: DTYPE: - VALUE: half @@ -22,3 +23,11 @@ image_to_nchw: STORAGE: texture2d - NAME: clone_image_to_buffer TO_STAGING: False + - NAME: image_to_nchw_no_pc_texture3d + USE_PUSH_CONST: False + - NAME: image_to_nchw_no_pc_texture2d + STORAGE: texture2d + USE_PUSH_CONST: False + - NAME: clone_image_to_buffer_no_pc + TO_STAGING: False + USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp index da06223cd12..fcbac2df0fc 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp @@ -88,9 +88,9 @@ void add_image_to_buffer_node( // Input and Outputs {{buffer, vkapi::kWrite}, {image, vkapi::kRead}}, // Parameter Buffers - {graph.sizes_ubo(image), graph.strides_ubo(buffer)}, - // Push Constants {}, + // Push Constants + {graph.sizes_pc_of(image), graph.strides_pc_of(buffer)}, // Specialization Constants {graph.hashed_layout_of(image)}, // Resize Args diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 32f478fa5bd..ff375fba89c 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -211,6 +211,8 @@ ValueRef prepack_weights( vkapi::ShaderInfo shader = get_conv2d_shader(graph, *t, /*prepack_weights = */ true, method, vref); + const auto original_sizes_pc = + utils::make_ivec4(original_sizes, /*reverse = */ true); graph.prepack_nodes().emplace_back(new PrepackNode( graph, shader, @@ -218,11 +220,11 @@ ValueRef prepack_weights( graph.create_local_wg_size(v), vref, v, - {t->sizes_ubo(), - graph.create_params_buffer( - utils::make_ivec4(original_sizes, /*reverse = */ true))}, + {}, // Specialization constants - {SV(t->packed_dim())})); + {SV(t->packed_dim())}, + {graph.sizes_pc_of(v), + PushConstantDataInfo(&original_sizes_pc, sizeof(original_sizes_pc))})); return v; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 4c46596c206..f429ab0fc25 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -113,18 +113,18 @@ void add_tensor_to_staging_node( vkapi::ShaderInfo shader = get_tensor_to_nchw_shader( *graph.get_tensor(in_tensor), graph.int8_buffers_enabled()); - vkapi::ParamsBindList ubos; + std::vector pcs; if (graph.is_buffer_storage(in_tensor)) { - ubos.append( - {graph.sizes_ubo(in_tensor), - graph.strides_ubo(in_tensor), - graph.numel_ubo(in_tensor)}); + pcs = { + graph.sizes_pc_of(in_tensor), + graph.strides_pc_of(in_tensor), + graph.numel_pc_of(in_tensor)}; } else { - ubos.append({graph.sizes_ubo(in_tensor)}); + pcs = {graph.sizes_pc_of(in_tensor)}; } if (is_bitw8_shader(shader)) { - ubos.append({graph.numel_ubo(in_tensor)}); + pcs.push_back(graph.numel_pc_of(in_tensor)); } graph.execute_nodes().emplace_back(new DynamicDispatchNode( @@ -135,9 +135,9 @@ void add_tensor_to_staging_node( // Input and Outputs {{out_staging, vkapi::kWrite}, {in_tensor, vkapi::kRead}}, // Parameter Buffers - ubos, - // Push Constants {}, + // Push Constants + pcs, // Specialization Constants {graph.hashed_layout_of(in_tensor)}, // Resize Args diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp index 6f3660fb0fc..ea3ae0fa1c3 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp @@ -59,13 +59,17 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader( vkapi::ShaderInfo get_tensor_to_nchw_shader( const api::vTensor& v_src, - bool int8_buffer_enabled) { + bool int8_buffer_enabled, + bool push_constant_variant) { std::string kernel_name; kernel_name.reserve(kShaderNameReserve); if (is_bitw8(v_src.dtype()) && v_src.storage_type() != utils::kBuffer && !int8_buffer_enabled) { kernel_name = "bitw8_image_to_nchw_nobitw8buffer"; + if (!push_constant_variant) { + kernel_name += "_no_pc"; + } add_storage_type_suffix(kernel_name, v_src); add_dtype_suffix(kernel_name, v_src); return VK_KERNEL_FROM_STR(kernel_name); @@ -73,11 +77,17 @@ vkapi::ShaderInfo get_tensor_to_nchw_shader( if (v_src.storage_type() == utils::kBuffer) { kernel_name = "buffer_to_nchw"; + if (!push_constant_variant) { + kernel_name += "_no_pc"; + } add_dtype_suffix(kernel_name, v_src); return VK_KERNEL_FROM_STR(kernel_name); } kernel_name = "image_to_nchw"; + if (!push_constant_variant) { + kernel_name += "_no_pc"; + } add_storage_type_suffix(kernel_name, v_src); add_dtype_suffix(kernel_name, v_src); diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h index 6abbac45823..9e6b61d6cd8 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.h @@ -18,6 +18,7 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader( bool push_constant_variant = true); vkapi::ShaderInfo get_tensor_to_nchw_shader( const api::vTensor& v_src, - bool int8_buffer_enabled = true); + bool int8_buffer_enabled = true, + bool push_constant_variant = true); } // namespace vkcompute diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index 99ee1c0fa0b..e842500e6be 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -51,7 +51,7 @@ void record_buffer_to_nchw_op( vkapi::VulkanBuffer& dst_buffer) { vkapi::PipelineBarrier pipeline_barrier{}; context->submit_compute_job( - get_tensor_to_nchw_shader(v_src), + get_tensor_to_nchw_shader(v_src, true, false), pipeline_barrier, {uint32_t(v_src.numel()), 1, 1}, {64, 1, 1}, @@ -99,7 +99,7 @@ void record_image_to_nchw_op( vkapi::SpecVarList specialization_constants = {v_src.hashed_layout()}; context->submit_compute_job( - get_tensor_to_nchw_shader(v_src), + get_tensor_to_nchw_shader(v_src, true, false), pipeline_barrier, v_src.logical_limits(), adaptive_work_group_size(v_src.logical_limits()), @@ -119,7 +119,7 @@ void record_bitw8_image_to_nchw_nobitw8buffer_op( uint32_t buffer_len = utils::safe_downcast(dst_buffer.numel() / 4); utils::uvec3 global_wg_size = {buffer_len, 1, 1}; - std::string kernel_name = "bitw8_image_to_nchw_nobitw8buffer"; + std::string kernel_name = "bitw8_image_to_nchw_nobitw8buffer_no_pc"; add_storage_type_suffix(kernel_name, v_src); add_dtype_suffix(kernel_name, v_src); diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 9887f3c7ffb..60b5ccb1a80 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1640,8 +1640,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { out.staging = graph.set_output_tensor(out.value); // +1: staging buffer input tensor - // +1: staging buffer for the output tensor - expected_vma_allocation_count += 2; + expected_vma_allocation_count += 1; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); graph.prepare();