diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 72835e4ba3a..5c2687a2709 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -178,7 +178,14 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); const auto [id, pointer_type] = buffer[BufferAlias::U32]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - return ctx.OpLoad(ctx.U32[1], ptr); + const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; + + if (Sirit::ValidId(buffer.size_dwords)) { + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); + return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value); + } else { + return result; + } } Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d30e746d8cd..81493c498d7 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -192,32 +192,49 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f UNREACHABLE_MSG("Invalid attribute type {}", fmt); } +Id EmitContext::GetBufferSize(const u32 sharp_idx) { + const auto& srt_flatbuf = buffers.back(); + ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo); + const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; + + const auto rsrc1{ + OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))}; + const auto rsrc2{ + OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 2)))}; + + const auto stride{OpBitFieldUExtract(U32[1], rsrc1, ConstU32(16u), ConstU32(14u))}; + const auto num_records{rsrc2}; + + const auto stride_zero{OpIEqual(U1[1], stride, u32_zero_value)}; + const auto stride_size{OpIMul(U32[1], num_records, stride)}; + return OpSelect(U32[1], stride_zero, num_records, stride_size); +} + void EmitContext::DefineBufferProperties() { for (BufferDefinition& buffer : buffers) { if (buffer.buffer_type != BufferType::Guest) { continue; } const u32 binding = buffer.binding; - const u32 offset_half = PushData::BufOffsetIndex + (binding >> 4); - const u32 offset_comp = (binding & 0xf) >> 2; - const u32 offset_bit = (binding & 0x3) << 3; - const Id offset_ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), - push_data_block, ConstU32(offset_half), - ConstU32(offset_comp))}; - const Id offset_value{OpLoad(U32[1], offset_ptr)}; - buffer.offset = - OpBitFieldUExtract(U32[1], offset_value, ConstU32(offset_bit), ConstU32(8U)); + const u32 half = PushData::BufOffsetIndex + (binding >> 4); + const u32 comp = (binding & 0xf) >> 2; + const u32 offset = (binding & 0x3) << 3; + const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), + push_data_block, ConstU32(half), ConstU32(comp))}; + const Id value{OpLoad(U32[1], ptr)}; + buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); Name(buffer.offset, fmt::format("buf{}_off", binding)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); - if (!profile.supports_robust_buffer_access) { - const u32 size_field = PushData::BufSizesIndex + (binding >> 2); - const u32 size_comp = binding & 0x3; - const Id size_ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), - push_data_block, ConstU32(size_field), - ConstU32(size_comp))}; - buffer.size = OpLoad(U32[1], size_ptr); + // Only need to load size if performing bounds checks and the buffer is both guest and not + // inline. + if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) { + if (buffer.desc.sharp_idx == std::numeric_limits::max()) { + buffer.size = ConstU32(buffer.desc.inline_cbuf.GetSize()); + } else { + buffer.size = GetBufferSize(buffer.desc.sharp_idx); + } Name(buffer.size, fmt::format("buf{}_size", binding)); buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U)); Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding)); @@ -606,8 +623,7 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4], - U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], U32[4], - U32[4], U32[4], U32[4], U32[4], U32[4], U32[4]), + U32[4], U32[4], U32[4], U32[4], U32[4]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, PushData::Step0Index, "sr0"); @@ -622,14 +638,6 @@ void EmitContext::DefinePushDataBlock() { MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3"); MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0"); MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1"); - MemberName(struct_type, PushData::BufSizesIndex + 0, "buf_sizes0"); - MemberName(struct_type, PushData::BufSizesIndex + 1, "buf_sizes1"); - MemberName(struct_type, PushData::BufSizesIndex + 2, "buf_sizes2"); - MemberName(struct_type, PushData::BufSizesIndex + 3, "buf_sizes3"); - MemberName(struct_type, PushData::BufSizesIndex + 4, "buf_sizes4"); - MemberName(struct_type, PushData::BufSizesIndex + 5, "buf_sizes5"); - MemberName(struct_type, PushData::BufSizesIndex + 6, "buf_sizes6"); - MemberName(struct_type, PushData::BufSizesIndex + 7, "buf_sizes7"); MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U); @@ -642,14 +650,6 @@ void EmitContext::DefinePushDataBlock() { MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 0, spv::Decoration::Offset, 120U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 1, spv::Decoration::Offset, 136U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 2, spv::Decoration::Offset, 152U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 3, spv::Decoration::Offset, 168U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 4, spv::Decoration::Offset, 184U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 5, spv::Decoration::Offset, 200U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 6, spv::Decoration::Offset, 216U); - MemberDecorate(struct_type, PushData::BufSizesIndex + 7, spv::Decoration::Offset, 232U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); @@ -694,18 +694,28 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte break; default: Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer)); + break; } interfaces.push_back(id); return {id, pointer_type}; }; void EmitContext::DefineBuffers() { + if (!profile.supports_robust_buffer_access && !info.has_readconst) { + // In case ReadConstUbo has not already been bound by IR and is needed + // to query buffer sizes, bind it now. + info.buffers.push_back({ + .used_types = IR::Type::U32, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::ReadConstUbo, + }); + } for (const auto& desc : info.buffers) { const auto buf_sharp = desc.GetSharp(info); const bool is_storage = desc.IsStorage(buf_sharp, profile); // Define aliases depending on the shader usage. - auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); + auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type, desc); if (True(desc.used_types & IR::Type::U32)) { spv_buffer[BufferAlias::U32] = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 73fb0dccdfb..0b0c9961a6a 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -246,6 +246,7 @@ class EmitContext final : public Sirit::Module { struct BufferDefinition { u32 binding; BufferType buffer_type; + const BufferResource& desc; Id offset; Id offset_dwords; Id size; @@ -310,6 +311,8 @@ class EmitContext final : public Sirit::Module { Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name); Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name); + + Id GetBufferSize(u32 sharp_idx); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 9c65e590796..8dcf9c5c4e2 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -106,7 +106,6 @@ struct PushData { static constexpr u32 YScaleIndex = 5; static constexpr u32 UdRegsIndex = 6; static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4; - static constexpr u32 BufSizesIndex = BufOffsetIndex + NumBuffers / sizeof(u32) / 4; u32 step0; u32 step1; @@ -116,17 +115,14 @@ struct PushData { float yscale; std::array ud_regs; std::array buf_offsets; - std::array buf_sizes; - void AddBuffer(u32 binding, u32 offset, u32 size) { + void AddOffset(u32 binding, u32 offset) { ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; - buf_sizes[binding] = size; } }; -static_assert(offsetof(PushData, buf_sizes) <= 128, - "PushData size without buf_sizes is greater than guaranteed by Vulkan spec"); -static_assert(sizeof(PushData) <= 256, "PushData size is greater than guaranteed by most GPUs"); +static_assert(sizeof(PushData) <= 128, + "PushData size is greater than minimum size guaranteed by Vulkan spec"); /** * Contains general information generated by the shader recompiler for an input program. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 0a60eadd3bf..bf43257f819 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -38,11 +38,7 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers& } const auto stage_flags = IsCompute() ? vk::ShaderStageFlagBits::eCompute : gp_stage_flags; - // If not emulating buffer bounds checks, buffer sizes are not needed. - const auto push_constants_size = instance.IsRobustBufferAccess2Supported() - ? offsetof(Shader::PushData, buf_sizes) - : sizeof(Shader::PushData); - cmdbuf.pushConstants(*pipeline_layout, stage_flags, 0u, push_constants_size, &push_data); + cmdbuf.pushConstants(*pipeline_layout, stage_flags, 0u, sizeof(push_data), &push_data); // Bind descriptor set. if (set_writes.empty()) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c59d46b6366..4d58c0ea3cb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -548,7 +548,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; ASSERT(adjust % 4 == 0); - push_data.AddBuffer(binding.buffer, adjust, vsharp.GetSize()); + push_data.AddOffset(binding.buffer, adjust); buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust); if (auto barrier =