diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c5d9f0ecd..88db554a9 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -20,14 +20,25 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst) { const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; - const IR::VectorReg dst_reg{inst.dst[0].code}; + IR::VectorReg dst_reg{inst.dst[0].code}; if (is_pair) { - // Pair loads are either 32 or 64-bit. We assume 32-bit for now. - ASSERT(bit_size == 32); + // Pair loads are either 32 or 64-bit const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); - ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)}); + const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0); + if (bit_size == 32) { + ir.SetVectorReg(dst_reg++, IR::U32{data0}); + } else { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)}); + } const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); - ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)}); + const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); + if (bit_size == 32) { + ir.SetVectorReg(dst_reg++, IR::U32{data1}); + } else { + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)}); + } } else if (bit_size == 64) { const IR::Value data = ir.LoadShared(bit_size, is_signed, addr); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); @@ -43,11 +54,20 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI const IR::VectorReg data0{inst.src[1].code}; const IR::VectorReg data1{inst.src[2].code}; if (is_pair) { - ASSERT(bit_size == 32); const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0))); - ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + if (bit_size == 32) { + ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + } else { + ir.WriteShared(64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)), + addr0); + } const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1))); - ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + if (bit_size == 32) { + ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + } else { + ir.WriteShared(64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)), + addr1); + } } else if (bit_size == 64) { const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 4f94c7292..51e60eeb8 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -1142,15 +1142,27 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::DS_READ_B32: translator.DS_READ(32, false, false, inst); break; + case Opcode::DS_READ_B64: + translator.DS_READ(64, false, false, inst); + break; case Opcode::DS_READ2_B32: translator.DS_READ(32, false, true, inst); break; + case Opcode::DS_READ2_B64: + translator.DS_READ(64, false, true, inst); + break; case Opcode::DS_WRITE_B32: translator.DS_WRITE(32, false, false, inst); break; + case Opcode::DS_WRITE_B64: + translator.DS_WRITE(64, false, false, inst); + break; case Opcode::DS_WRITE2_B32: translator.DS_WRITE(32, false, true, inst); break; + case Opcode::DS_WRITE2_B64: + translator.DS_WRITE(64, false, true, inst); + break; case Opcode::V_READFIRSTLANE_B32: translator.V_READFIRSTLANE_B32(inst); break;