Skip to content

[ET-VK] Use push constants for image and buffer to nchw prepack nodes. #11305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,15 @@ layout(std430) buffer;

${layout_declare_buffer(B, "w", "nchw_out", "int")}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "tensor_sizes")}
${layout_declare_ubo(B, "int", "out_numel")}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 tensor_sizes;
int out_numel;
};
$else:
${layout_declare_ubo(B, "ivec4", "tensor_sizes")}
${layout_declare_ubo(B, "int", "out_numel")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ bitw8_image_to_nchw_nobitw8buffer:
parameter_names_with_default_values:
STORAGE: texture3d
DTYPE: int8
USE_PUSH_CONST: True
generate_variant_forall:
STORAGE:
- VALUE: texture2d
Expand All @@ -17,3 +18,5 @@ bitw8_image_to_nchw_nobitw8buffer:
- VALUE: uint8
shader_variants:
- NAME: bitw8_image_to_nchw_nobitw8buffer
- NAME: bitw8_image_to_nchw_nobitw8buffer_no_pc
USE_PUSH_CONST: False
14 changes: 11 additions & 3 deletions backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@ layout(std430) buffer;

${layout_declare_tensor(0, "w", "nchw_buf", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(2, "ivec4", "in_sizes")}
${layout_declare_ubo(3, "ivec4", "in_strides")}
${layout_declare_ubo(4, "int", "numel")}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 in_sizes;
ivec4 in_strides;
int numel;
};
$else:
${layout_declare_ubo(2, "ivec4", "in_sizes")}
${layout_declare_ubo(3, "ivec4", "in_strides")}
${layout_declare_ubo(4, "int", "numel")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

Expand Down
3 changes: 3 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ buffer_to_nchw:
parameter_names_with_default_values:
DTYPE: float
STORAGE: buffer
USE_PUSH_CONST: True
generate_variant_forall:
DTYPE:
- VALUE: half
Expand All @@ -17,3 +18,5 @@ buffer_to_nchw:
- VALUE: uint8
shader_variants:
- NAME: buffer_to_nchw
- NAME: buffer_to_nchw_no_pc
USE_PUSH_CONST: False
14 changes: 11 additions & 3 deletions backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,17 @@ layout(std430) buffer;

${layout_declare_buffer(B, "w", "buf_out", DTYPE)}
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not TO_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 sizes;
$if not TO_STAGING:
ivec4 buf_strides;
};
$else:
${layout_declare_ubo(B, "ivec4", "sizes")}
$if not TO_STAGING:
${layout_declare_ubo(B, "ivec4", "buf_strides")}

#include "indexing_utils.h"

Expand Down
9 changes: 9 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/image_to_nchw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ image_to_nchw:
DTYPE: float
STORAGE: texture3d
TO_STAGING: True
USE_PUSH_CONST: True
generate_variant_forall:
DTYPE:
- VALUE: half
Expand All @@ -22,3 +23,11 @@ image_to_nchw:
STORAGE: texture2d
- NAME: clone_image_to_buffer
TO_STAGING: False
- NAME: image_to_nchw_no_pc_texture3d
USE_PUSH_CONST: False
- NAME: image_to_nchw_no_pc_texture2d
STORAGE: texture2d
USE_PUSH_CONST: False
- NAME: clone_image_to_buffer_no_pc
TO_STAGING: False
USE_PUSH_CONST: False
4 changes: 2 additions & 2 deletions backends/vulkan/runtime/graph/ops/impl/Clone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ void add_image_to_buffer_node(
// Input and Outputs
{{buffer, vkapi::kWrite}, {image, vkapi::kRead}},
// Parameter Buffers
{graph.sizes_ubo(image), graph.strides_ubo(buffer)},
// Push Constants
{},
// Push Constants
{graph.sizes_pc_of(image), graph.strides_pc_of(buffer)},
// Specialization Constants
{graph.hashed_layout_of(image)},
// Resize Args
Expand Down
18 changes: 9 additions & 9 deletions backends/vulkan/runtime/graph/ops/impl/Staging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,18 @@ void add_tensor_to_staging_node(
vkapi::ShaderInfo shader = get_tensor_to_nchw_shader(
*graph.get_tensor(in_tensor), graph.int8_buffers_enabled());

vkapi::ParamsBindList ubos;
std::vector<PushConstantDataInfo> pcs;
if (graph.is_buffer_storage(in_tensor)) {
ubos.append(
{graph.sizes_ubo(in_tensor),
graph.strides_ubo(in_tensor),
graph.numel_ubo(in_tensor)});
pcs = {
graph.sizes_pc_of(in_tensor),
graph.strides_pc_of(in_tensor),
graph.numel_pc_of(in_tensor)};
} else {
ubos.append({graph.sizes_ubo(in_tensor)});
pcs = {graph.sizes_pc_of(in_tensor)};
}

if (is_bitw8_shader(shader)) {
ubos.append({graph.numel_ubo(in_tensor)});
pcs.push_back(graph.numel_pc_of(in_tensor));
}

graph.execute_nodes().emplace_back(new DynamicDispatchNode(
Expand All @@ -135,9 +135,9 @@ void add_tensor_to_staging_node(
// Input and Outputs
{{out_staging, vkapi::kWrite}, {in_tensor, vkapi::kRead}},
// Parameter Buffers
ubos,
// Push Constants
{},
// Push Constants
pcs,
// Specialization Constants
{graph.hashed_layout_of(in_tensor)},
// Resize Args
Expand Down
12 changes: 11 additions & 1 deletion backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,35 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader(

vkapi::ShaderInfo get_tensor_to_nchw_shader(
const api::vTensor& v_src,
bool int8_buffer_enabled) {
bool int8_buffer_enabled,
bool push_constant_variant) {
std::string kernel_name;
kernel_name.reserve(kShaderNameReserve);

if (is_bitw8(v_src.dtype()) && v_src.storage_type() != utils::kBuffer &&
!int8_buffer_enabled) {
kernel_name = "bitw8_image_to_nchw_nobitw8buffer";
if (!push_constant_variant) {
kernel_name += "_no_pc";
}
add_storage_type_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);
return VK_KERNEL_FROM_STR(kernel_name);
}

if (v_src.storage_type() == utils::kBuffer) {
kernel_name = "buffer_to_nchw";
if (!push_constant_variant) {
kernel_name += "_no_pc";
}
add_dtype_suffix(kernel_name, v_src);
return VK_KERNEL_FROM_STR(kernel_name);
}

kernel_name = "image_to_nchw";
if (!push_constant_variant) {
kernel_name += "_no_pc";
}
add_storage_type_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);

Expand Down
3 changes: 2 additions & 1 deletion backends/vulkan/runtime/graph/ops/utils/StagingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader(
bool push_constant_variant = true);
vkapi::ShaderInfo get_tensor_to_nchw_shader(
const api::vTensor& v_src,
bool int8_buffer_enabled = true);
bool int8_buffer_enabled = true,
bool push_constant_variant = true);

} // namespace vkcompute
6 changes: 3 additions & 3 deletions backends/vulkan/test/utils/test_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void record_buffer_to_nchw_op(
vkapi::VulkanBuffer& dst_buffer) {
vkapi::PipelineBarrier pipeline_barrier{};
context->submit_compute_job(
get_tensor_to_nchw_shader(v_src),
get_tensor_to_nchw_shader(v_src, true, false),
pipeline_barrier,
{uint32_t(v_src.numel()), 1, 1},
{64, 1, 1},
Expand Down Expand Up @@ -99,7 +99,7 @@ void record_image_to_nchw_op(
vkapi::SpecVarList specialization_constants = {v_src.hashed_layout()};

context->submit_compute_job(
get_tensor_to_nchw_shader(v_src),
get_tensor_to_nchw_shader(v_src, true, false),
pipeline_barrier,
v_src.logical_limits(),
adaptive_work_group_size(v_src.logical_limits()),
Expand All @@ -119,7 +119,7 @@ void record_bitw8_image_to_nchw_nobitw8buffer_op(
uint32_t buffer_len = utils::safe_downcast<uint32_t>(dst_buffer.numel() / 4);
utils::uvec3 global_wg_size = {buffer_len, 1, 1};

std::string kernel_name = "bitw8_image_to_nchw_nobitw8buffer";
std::string kernel_name = "bitw8_image_to_nchw_nobitw8buffer_no_pc";
add_storage_type_suffix(kernel_name, v_src);
add_dtype_suffix(kernel_name, v_src);

Expand Down
3 changes: 1 addition & 2 deletions backends/vulkan/test/vulkan_compute_api_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1640,8 +1640,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) {
out.staging = graph.set_output_tensor(out.value);

// +1: staging buffer input tensor
// +1: staging buffer for the output tensor
expected_vma_allocation_count += 2;
expected_vma_allocation_count += 1;
EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count);

graph.prepare();
Expand Down
Loading