diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index b4919213b71..190129e5fca 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -277,10 +277,9 @@ class Translator { // Buffer Memory // MUBUF / MTBUF - void BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst); - void BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst); - void BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst); - void BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst); + void BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed, const GcnInst& inst); + void BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed, + const GcnInst& inst); void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); // Image Memory diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index bfbe937a1ae..ed7788d8c77 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -11,59 +11,59 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { // Buffer load operations case Opcode::TBUFFER_LOAD_FORMAT_X: - return BUFFER_LOAD(1, true, inst); + return BUFFER_LOAD(1, true, false, inst); case Opcode::TBUFFER_LOAD_FORMAT_XY: - return BUFFER_LOAD(2, true, inst); + return BUFFER_LOAD(2, true, false, inst); case Opcode::TBUFFER_LOAD_FORMAT_XYZ: - return BUFFER_LOAD(3, true, inst); + return BUFFER_LOAD(3, true, false, inst); case Opcode::TBUFFER_LOAD_FORMAT_XYZW: - return BUFFER_LOAD(4, true, inst); + return BUFFER_LOAD(4, true, false, inst); case Opcode::BUFFER_LOAD_FORMAT_X: - return BUFFER_LOAD_FORMAT(1, inst); + return BUFFER_LOAD(1, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XY: - return BUFFER_LOAD_FORMAT(2, inst); + return BUFFER_LOAD(2, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XYZ: - return BUFFER_LOAD_FORMAT(3, inst); + return BUFFER_LOAD(3, false, true, inst); case Opcode::BUFFER_LOAD_FORMAT_XYZW: - return BUFFER_LOAD_FORMAT(4, inst); + return BUFFER_LOAD(4, false, true, inst); case Opcode::BUFFER_LOAD_DWORD: - return BUFFER_LOAD(1, false, inst); + return BUFFER_LOAD(1, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX2: - return BUFFER_LOAD(2, false, inst); + return BUFFER_LOAD(2, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX3: - return BUFFER_LOAD(3, false, inst); + return BUFFER_LOAD(3, false, false, inst); case Opcode::BUFFER_LOAD_DWORDX4: - return BUFFER_LOAD(4, false, inst); + return BUFFER_LOAD(4, false, false, inst); // Buffer store operations case Opcode::BUFFER_STORE_FORMAT_X: - return BUFFER_STORE_FORMAT(1, inst); + return BUFFER_STORE(1, false, true, inst); case Opcode::BUFFER_STORE_FORMAT_XY: - return BUFFER_STORE_FORMAT(2, inst); + return BUFFER_STORE(2, false, true, inst); case Opcode::BUFFER_STORE_FORMAT_XYZ: - return BUFFER_STORE_FORMAT(3, inst); + return BUFFER_STORE(3, false, true, inst); case Opcode::BUFFER_STORE_FORMAT_XYZW: - return BUFFER_STORE_FORMAT(4, inst); + return BUFFER_STORE(4, false, true, inst); case Opcode::TBUFFER_STORE_FORMAT_X: - return BUFFER_STORE(1, true, inst); + return BUFFER_STORE(1, true, false, inst); case Opcode::TBUFFER_STORE_FORMAT_XY: - return BUFFER_STORE(2, true, inst); + return BUFFER_STORE(2, true, false, inst); case Opcode::TBUFFER_STORE_FORMAT_XYZ: - return BUFFER_STORE(3, true, inst); + return BUFFER_STORE(3, true, false, inst); case Opcode::TBUFFER_STORE_FORMAT_XYZW: - return BUFFER_STORE(4, true, inst); + return BUFFER_STORE(4, true, false, inst); case Opcode::BUFFER_STORE_DWORD: - return BUFFER_STORE(1, false, inst); + return BUFFER_STORE(1, false, false, inst); case Opcode::BUFFER_STORE_DWORDX2: - return BUFFER_STORE(2, false, inst); + return BUFFER_STORE(2, false, false, inst); case Opcode::BUFFER_STORE_DWORDX3: - return BUFFER_STORE(3, false, inst); + return BUFFER_STORE(3, false, false, inst); case Opcode::BUFFER_STORE_DWORDX4: - return BUFFER_STORE(4, false, inst); + return BUFFER_STORE(4, false, false, inst); // Buffer atomic operations case Opcode::BUFFER_ATOMIC_ADD: @@ -165,7 +165,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { } } -void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) { +void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed, + const GcnInst& inst) { const auto& mubuf = inst.control.mubuf; const bool is_ring = mubuf.glc && mubuf.slc; const IR::VectorReg vaddr{inst.src[0].code}; @@ -195,66 +196,38 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); - buffer_info.typed.Assign(is_typed); - if (is_typed) { + buffer_info.typed.Assign(is_inst_typed || is_buffer_typed); + if (is_inst_typed) { const auto& mtbuf = inst.control.mtbuf; - const auto dmft = static_cast(mtbuf.dfmt); - const auto nfmt = static_cast(mtbuf.nfmt); - ASSERT(nfmt == AmdGpu::NumberFormat::Float && - (dmft == AmdGpu::DataFormat::Format32_32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32)); + buffer_info.inst_data_fmt.Assign(static_cast(mtbuf.dfmt)); + buffer_info.inst_num_fmt.Assign(static_cast(mtbuf.nfmt)); + } else { + buffer_info.inst_data_fmt.Assign(AmdGpu::DataFormat::FormatInvalid); } const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.LoadBufferU32(num_dwords, handle, address, buffer_info); const IR::VectorReg dst_reg{inst.src[1].code}; - if (num_dwords == 1) { - ir.SetVectorReg(dst_reg, IR::U32{value}); - return; - } - for (u32 i = 0; i < num_dwords; i++) { - ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)}); - } -} - -void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) { - const auto& mubuf = inst.control.mubuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::ScalarReg sharp{inst.src[2].code * 4}; - const IR::Value address = [&] -> IR::Value { - if (mubuf.idxen && mubuf.offen) { - return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); + if (buffer_info.typed) { + const IR::Value value = ir.LoadBufferFormat(handle, address, buffer_info); + for (u32 i = 0; i < num_dwords; i++) { + ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); } - if (mubuf.idxen || mubuf.offen) { - return ir.GetVectorReg(vaddr); + } else { + const IR::Value value = ir.LoadBufferU32(num_dwords, handle, address, buffer_info); + if (num_dwords == 1) { + ir.SetVectorReg(dst_reg, IR::U32{value}); + return; + } + for (u32 i = 0; i < num_dwords; i++) { + ir.SetVectorReg(dst_reg + i, IR::U32{ir.CompositeExtract(value, i)}); } - return {}; - }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo buffer_info{}; - buffer_info.index_enable.Assign(mubuf.idxen); - buffer_info.offset_enable.Assign(mubuf.offen); - buffer_info.inst_offset.Assign(mubuf.offset); - buffer_info.globally_coherent.Assign(mubuf.glc); - buffer_info.system_coherent.Assign(mubuf.slc); - buffer_info.typed.Assign(true); - - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), - ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.LoadBufferFormat(handle, address, buffer_info); - const IR::VectorReg dst_reg{inst.src[1].code}; - for (u32 i = 0; i < num_dwords; i++) { - ir.SetVectorReg(dst_reg + i, IR::F32{ir.CompositeExtract(value, i)}); } } -void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst) { +void Translator::BUFFER_STORE(u32 num_dwords, bool is_inst_typed, bool is_buffer_typed, + const GcnInst& inst) { const auto& mubuf = inst.control.mubuf; const bool is_ring = mubuf.glc && mubuf.slc; const IR::VectorReg vaddr{inst.src[0].code}; @@ -285,80 +258,38 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); - buffer_info.typed.Assign(is_typed); - if (is_typed) { + buffer_info.typed.Assign(is_inst_typed || is_buffer_typed); + if (is_inst_typed) { const auto& mtbuf = inst.control.mtbuf; - const auto dmft = static_cast(mtbuf.dfmt); - const auto nfmt = static_cast(mtbuf.nfmt); - ASSERT(nfmt == AmdGpu::NumberFormat::Float && - (dmft == AmdGpu::DataFormat::Format32_32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32_32 || - dmft == AmdGpu::DataFormat::Format32_32 || dmft == AmdGpu::DataFormat::Format32)); + buffer_info.inst_data_fmt.Assign(static_cast(mtbuf.dfmt)); + buffer_info.inst_num_fmt.Assign(static_cast(mtbuf.nfmt)); + } else { + buffer_info.inst_data_fmt.Assign(AmdGpu::DataFormat::FormatInvalid); } - IR::Value value{}; - const IR::VectorReg src_reg{inst.src[1].code}; - switch (num_dwords) { - case 1: - value = ir.GetVectorReg(src_reg); - break; - case 2: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1)); - break; - case 3: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2)); - break; - case 4: - value = ir.CompositeConstruct(ir.GetVectorReg(src_reg), ir.GetVectorReg(src_reg + 1), - ir.GetVectorReg(src_reg + 2), ir.GetVectorReg(src_reg + 3)); - break; - } const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBufferU32(num_dwords, handle, address, value, buffer_info); -} - -void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { - const auto& mubuf = inst.control.mubuf; - const IR::VectorReg vaddr{inst.src[0].code}; - const IR::ScalarReg sharp{inst.src[2].code * 4}; - const IR::Value address = [&] -> IR::Value { - if (mubuf.idxen && mubuf.offen) { - return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); - } - if (mubuf.idxen || mubuf.offen) { - return ir.GetVectorReg(vaddr); - } - return {}; - }(); - const IR::Value soffset{GetSrc(inst.src[3])}; - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); - - IR::BufferInstInfo buffer_info{}; - buffer_info.index_enable.Assign(mubuf.idxen); - buffer_info.offset_enable.Assign(mubuf.offen); - buffer_info.inst_offset.Assign(mubuf.offset); - buffer_info.globally_coherent.Assign(mubuf.glc); - buffer_info.system_coherent.Assign(mubuf.slc); - buffer_info.typed.Assign(true); - const IR::VectorReg src_reg{inst.src[1].code}; - std::array comps{}; + boost::container::static_vector comps; for (u32 i = 0; i < num_dwords; i++) { - comps[i] = ir.GetVectorReg(src_reg + i); + const auto src_reg_i = src_reg + i; + if (buffer_info.typed) { + comps.push_back(ir.GetVectorReg(src_reg_i)); + } else { + comps.push_back(ir.GetVectorReg(src_reg_i)); + } } - for (u32 i = num_dwords; i < 4; i++) { - comps[i] = ir.Imm32(0.f); + if (buffer_info.typed) { + for (u32 i = num_dwords; i < 4; i++) { + comps.push_back(ir.Imm32(0.f)); + } + ir.StoreBufferFormat(handle, address, ir.CompositeConstruct(comps), buffer_info); + } else { + const auto value = num_dwords == 1 ? comps[0] : ir.CompositeConstruct(comps); + ir.StoreBufferU32(num_dwords, handle, address, value, buffer_info); } - - const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), - ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - ir.StoreBufferFormat(handle, address, value, buffer_info); } void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 06c01878dc8..3615e8cbb66 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -638,7 +638,8 @@ Value IREmitter::CompositeConstruct(std::span elements) { case 4: return CompositeConstruct(elements[0], elements[1], elements[2], elements[3]); default: - UNREACHABLE_MSG("Composite construct with greater than 4 elements"); + UNREACHABLE_MSG("Composite construct with {} elements, only 2-4 are supported", + elements.size()); } } diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp index b30b022f884..3fdc6f0cd31 100644 --- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp +++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp @@ -10,6 +10,14 @@ namespace Shader::Optimization { +struct FormatInfo { + AmdGpu::DataFormat data_format; + AmdGpu::NumberFormat num_format; + AmdGpu::CompMapping swizzle; + AmdGpu::NumberConversion num_conversion; + int num_components; +}; + static bool IsBufferFormatLoad(const IR::Inst& inst) { return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32; } @@ -18,152 +26,151 @@ static bool IsBufferFormatStore(const IR::Inst& inst) { return inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32; } -static IR::Value LoadBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer, - const IR::Value handle, const IR::U32 address, - const IR::BufferInstInfo info) { - const auto data_fmt = buffer.GetDataFmt(); - const auto num_fmt = buffer.GetNumberFmt(); - const auto num_conv = buffer.GetNumberConversion(); - const auto num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); - +static IR::Value LoadBufferFormat(IR::IREmitter& ir, const IR::Value handle, const IR::U32 address, + const IR::BufferInstInfo info, const FormatInfo& format_info) { IR::Value interpreted; - switch (data_fmt) { + switch (format_info.data_format) { case AmdGpu::DataFormat::FormatInvalid: interpreted = ir.Imm32(0.f); break; case AmdGpu::DataFormat::Format8: { - const auto unpacked = ir.Unpack4x8(num_fmt, ir.LoadBufferU8(handle, address, info)); + const auto unpacked = + ir.Unpack4x8(format_info.num_format, ir.LoadBufferU8(handle, address, info)); interpreted = ir.CompositeExtract(unpacked, 0); break; } case AmdGpu::DataFormat::Format8_8: { const auto raw = ir.LoadBufferU16(handle, address, info); - const auto unpacked = ir.Unpack4x8(num_fmt, raw); + const auto unpacked = ir.Unpack4x8(format_info.num_format, raw); interpreted = ir.CompositeConstruct(ir.CompositeExtract(unpacked, 0), ir.CompositeExtract(unpacked, 1)); break; } case AmdGpu::DataFormat::Format8_8_8_8: - interpreted = ir.Unpack4x8(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)}); + interpreted = ir.Unpack4x8(format_info.num_format, + IR::U32{ir.LoadBufferU32(1, handle, address, info)}); break; case AmdGpu::DataFormat::Format16: { - const auto unpacked = ir.Unpack2x16(num_fmt, ir.LoadBufferU16(handle, address, info)); + const auto unpacked = + ir.Unpack2x16(format_info.num_format, ir.LoadBufferU16(handle, address, info)); interpreted = ir.CompositeExtract(unpacked, 0); break; } case AmdGpu::DataFormat::Format16_16: - interpreted = ir.Unpack2x16(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)}); + interpreted = ir.Unpack2x16(format_info.num_format, + IR::U32{ir.LoadBufferU32(1, handle, address, info)}); break; case AmdGpu::DataFormat::Format10_11_11: - interpreted = - ir.Unpack10_11_11(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)}); + interpreted = ir.Unpack10_11_11(format_info.num_format, + IR::U32{ir.LoadBufferU32(1, handle, address, info)}); break; case AmdGpu::DataFormat::Format2_10_10_10: - interpreted = - ir.Unpack2_10_10_10(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)}); + interpreted = ir.Unpack2_10_10_10(format_info.num_format, + IR::U32{ir.LoadBufferU32(1, handle, address, info)}); break; case AmdGpu::DataFormat::Format16_16_16_16: { const auto raw = ir.LoadBufferU32(2, handle, address, info); - interpreted = - ir.CompositeConstruct(ir.Unpack2x16(num_fmt, IR::U32{ir.CompositeExtract(raw, 0)}), - ir.Unpack2x16(num_fmt, IR::U32{ir.CompositeExtract(raw, 1)})); + interpreted = ir.CompositeConstruct( + ir.Unpack2x16(format_info.num_format, IR::U32{ir.CompositeExtract(raw, 0)}), + ir.Unpack2x16(format_info.num_format, IR::U32{ir.CompositeExtract(raw, 1)})); break; } case AmdGpu::DataFormat::Format32: case AmdGpu::DataFormat::Format32_32: case AmdGpu::DataFormat::Format32_32_32: case AmdGpu::DataFormat::Format32_32_32_32: { - ASSERT(num_fmt == AmdGpu::NumberFormat::Uint || num_fmt == AmdGpu::NumberFormat::Sint || - num_fmt == AmdGpu::NumberFormat::Float); - interpreted = ir.LoadBufferF32(num_components, handle, address, info); + ASSERT(format_info.num_format == AmdGpu::NumberFormat::Uint || + format_info.num_format == AmdGpu::NumberFormat::Sint || + format_info.num_format == AmdGpu::NumberFormat::Float); + interpreted = ir.LoadBufferF32(format_info.num_components, handle, address, info); break; } default: - UNREACHABLE_MSG("Unsupported buffer data format: {}", data_fmt); + UNREACHABLE_MSG("Unsupported buffer data format: {}", format_info.data_format); } // Pad to 4 components and apply additional modifications. boost::container::static_vector components; for (u32 i = 0; i < 4; i++) { - if (i < num_components) { + if (i < format_info.num_components) { const auto component = - IR::F32{num_components == 1 ? interpreted : ir.CompositeExtract(interpreted, i)}; - components.push_back(ApplyReadNumberConversion(ir, component, num_conv)); + IR::F32{format_info.num_components == 1 ? interpreted + : ir.CompositeExtract(interpreted, i)}; + components.push_back( + ApplyReadNumberConversion(ir, component, format_info.num_conversion)); } else { components.push_back(ir.Imm32(0.f)); } } - const auto swizzled = ApplySwizzle(ir, ir.CompositeConstruct(components), buffer.DstSelect()); + const auto swizzled = ApplySwizzle(ir, ir.CompositeConstruct(components), format_info.swizzle); return swizzled; } -static void StoreBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer, - const IR::Value handle, const IR::U32 address, const IR::Value& value, - const IR::BufferInstInfo info) { - const auto data_fmt = buffer.GetDataFmt(); - const auto num_fmt = buffer.GetNumberFmt(); - const auto num_conv = buffer.GetNumberConversion(); - const auto num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); - +static void StoreBufferFormat(IR::IREmitter& ir, const IR::Value handle, const IR::U32 address, + const IR::Value& value, const IR::BufferInstInfo info, + const FormatInfo& format_info) { // Extract actual number of components and apply additional modifications. - const auto swizzled = ApplySwizzle(ir, value, buffer.DstSelect().Inverse()); + const auto swizzled = ApplySwizzle(ir, value, format_info.swizzle.Inverse()); boost::container::static_vector components; - for (u32 i = 0; i < num_components; i++) { + for (u32 i = 0; i < format_info.num_components; i++) { const auto component = IR::F32{ir.CompositeExtract(swizzled, i)}; - components.push_back(ApplyWriteNumberConversion(ir, component, num_conv)); + components.push_back(ApplyWriteNumberConversion(ir, component, format_info.num_conversion)); } const auto real_value = components.size() == 1 ? components[0] : ir.CompositeConstruct(components); - switch (data_fmt) { + switch (format_info.data_format) { case AmdGpu::DataFormat::FormatInvalid: break; case AmdGpu::DataFormat::Format8: { const auto packed = - ir.Pack4x8(num_fmt, ir.CompositeConstruct(real_value, ir.Imm32(0.f), ir.Imm32(0.f), - ir.Imm32(0.f))); + ir.Pack4x8(format_info.num_format, ir.CompositeConstruct(real_value, ir.Imm32(0.f), + ir.Imm32(0.f), ir.Imm32(0.f))); ir.StoreBufferU8(handle, address, packed, info); break; } case AmdGpu::DataFormat::Format8_8: { - const auto packed = - ir.Pack4x8(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 0), - ir.CompositeExtract(real_value, 1), - ir.Imm32(0.f), ir.Imm32(0.f))); + const auto packed = ir.Pack4x8(format_info.num_format, + ir.CompositeConstruct(ir.CompositeExtract(real_value, 0), + ir.CompositeExtract(real_value, 1), + ir.Imm32(0.f), ir.Imm32(0.f))); ir.StoreBufferU16(handle, address, packed, info); break; } case AmdGpu::DataFormat::Format8_8_8_8: { - auto packed = ir.Pack4x8(num_fmt, real_value); + auto packed = ir.Pack4x8(format_info.num_format, real_value); ir.StoreBufferU32(1, handle, address, packed, info); break; } case AmdGpu::DataFormat::Format16: { - const auto packed = ir.Pack2x16(num_fmt, ir.CompositeConstruct(real_value, ir.Imm32(0.f))); + const auto packed = + ir.Pack2x16(format_info.num_format, ir.CompositeConstruct(real_value, ir.Imm32(0.f))); ir.StoreBufferU16(handle, address, packed, info); break; } case AmdGpu::DataFormat::Format16_16: { - const auto packed = ir.Pack2x16(num_fmt, real_value); + const auto packed = ir.Pack2x16(format_info.num_format, real_value); ir.StoreBufferU32(1, handle, address, packed, info); break; } case AmdGpu::DataFormat::Format10_11_11: { - const auto packed = ir.Pack10_11_11(num_fmt, real_value); + const auto packed = ir.Pack10_11_11(format_info.num_format, real_value); ir.StoreBufferU32(1, handle, address, packed, info); break; } case AmdGpu::DataFormat::Format2_10_10_10: { - const auto packed = ir.Pack2_10_10_10(num_fmt, real_value); + const auto packed = ir.Pack2_10_10_10(format_info.num_format, real_value); ir.StoreBufferU32(1, handle, address, packed, info); break; } case AmdGpu::DataFormat::Format16_16_16_16: { const auto packed = ir.CompositeConstruct( - ir.Pack2x16(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 0), - ir.CompositeExtract(real_value, 1))), - ir.Pack2x16(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 2), - ir.CompositeExtract(real_value, 3)))); + ir.Pack2x16(format_info.num_format, + ir.CompositeConstruct(ir.CompositeExtract(real_value, 0), + ir.CompositeExtract(real_value, 1))), + ir.Pack2x16(format_info.num_format, + ir.CompositeConstruct(ir.CompositeExtract(real_value, 2), + ir.CompositeExtract(real_value, 3)))); ir.StoreBufferU32(2, handle, address, packed, info); break; } @@ -171,28 +178,40 @@ static void StoreBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer, case AmdGpu::DataFormat::Format32_32: case AmdGpu::DataFormat::Format32_32_32: case AmdGpu::DataFormat::Format32_32_32_32: { - ASSERT(num_fmt == AmdGpu::NumberFormat::Uint || num_fmt == AmdGpu::NumberFormat::Sint || - num_fmt == AmdGpu::NumberFormat::Float); - ir.StoreBufferF32(num_components, handle, address, real_value, info); + ASSERT(format_info.num_format == AmdGpu::NumberFormat::Uint || + format_info.num_format == AmdGpu::NumberFormat::Sint || + format_info.num_format == AmdGpu::NumberFormat::Float); + ir.StoreBufferF32(format_info.num_components, handle, address, real_value, info); break; } default: - UNREACHABLE_MSG("Unsupported buffer data format: {}", data_fmt); + UNREACHABLE_MSG("Unsupported buffer data format: {}", format_info.data_format); } } static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + const auto flags = inst.Flags(); const auto desc{info.buffers[inst.Arg(0).U32()]}; const auto buffer{desc.GetSharp(info)}; + const auto is_inst_typed = flags.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; + const auto data_format = is_inst_typed ? flags.inst_data_fmt.Value() : buffer.GetDataFmt(); + const auto num_format = is_inst_typed ? flags.inst_num_fmt.Value() : buffer.GetNumberFmt(); + const auto format_info = FormatInfo{ + .data_format = data_format, + .num_format = num_format, + .swizzle = is_inst_typed ? AmdGpu::IdentityMapping : buffer.DstSelect(), + .num_conversion = AmdGpu::MapNumberConversion(num_format), + .num_components = AmdGpu::NumComponents(data_format), + }; + if (IsBufferFormatLoad(inst)) { - const auto interpreted = LoadBufferFormat(ir, buffer, inst.Arg(0), IR::U32{inst.Arg(1)}, - inst.Flags()); + const auto interpreted = + LoadBufferFormat(ir, inst.Arg(0), IR::U32{inst.Arg(1)}, flags, format_info); inst.ReplaceUsesWithAndRemove(interpreted); } else if (IsBufferFormatStore(inst)) { - StoreBufferFormat(ir, buffer, inst.Arg(0), IR::U32{inst.Arg(1)}, inst.Arg(2), - inst.Flags()); + StoreBufferFormat(ir, inst.Arg(0), IR::U32{inst.Arg(1)}, inst.Arg(2), flags, format_info); inst.Invalidate(); } } diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 3ee7c435563..40c4b61c341 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/enum.h" #include "common/types.h" +#include "video_core/amdgpu/types.h" namespace Shader::IR { @@ -52,6 +53,8 @@ union BufferInstInfo { BitField<14, 1, u32> system_coherent; BitField<15, 1, u32> globally_coherent; BitField<16, 1, u32> typed; + BitField<17, 4, AmdGpu::DataFormat> inst_data_fmt; + BitField<21, 3, AmdGpu::NumberFormat> inst_num_fmt; }; enum class ScalarReg : u32 { diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index d991e0abd09..d1cf1907635 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -262,6 +262,13 @@ struct CompMapping { } }; +static constexpr CompMapping IdentityMapping = { + .r = CompSwizzle::Red, + .g = CompSwizzle::Green, + .b = CompSwizzle::Blue, + .a = CompSwizzle::Alpha, +}; + inline DataFormat RemapDataFormat(const DataFormat format) { switch (format) { case DataFormat::Format11_11_10: