diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 497658ccd..7c98ddd5e 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -29,6 +29,8 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_MAX_U32(inst); case Opcode::DS_MIN_U32: return DS_MIN_U32(inst); + case Opcode::DS_ADD_U32: + return DS_ADD_U32(inst); default: LogMissingOpcode(inst); } @@ -144,6 +146,21 @@ void Translator::DS_MIN_U32(const GcnInst& inst) { } } +void Translator::DS_ADD_U32(const GcnInst& inst) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = ir.Imm32( + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::U32 aligned_addr = ir.BitwiseAnd(addr_offset, ir.Imm32(~3)); + const IR::U32 old_value = IR::U32(ir.LoadShared(32, false, aligned_addr)); + const IR::U32 new_value = ir.IAdd(old_value, data); + ir.WriteShared(32, new_value, aligned_addr); + if (inst.dst[0].type != ScalarType::Undefined) { + SetDst(inst.dst[0], new_value); + } +} + void Translator::S_BARRIER() { ir.Barrier(); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 08e7835a1..5f05d9761 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -187,7 +187,7 @@ public: // Vector Memory void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst); void BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst); - void BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst); + void BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst); // Vector interpolation void V_INTERP_P2_F32(const GcnInst& inst); @@ -199,6 +199,7 @@ public: void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst); void DS_MAX_U32(const GcnInst& inst); void DS_MIN_U32(const GcnInst& inst); + void DS_ADD_U32(const GcnInst& inst); void V_READFIRSTLANE_B32(const GcnInst& inst); void V_READLANE_B32(const GcnInst& inst); void V_WRITELANE_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 8fcbf2a27..da8ea525e 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -89,7 +89,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { case Opcode::BUFFER_STORE_DWORDX4: return BUFFER_STORE_FORMAT(4, false, inst); case Opcode::BUFFER_ATOMIC_ADD: - return BUFFER_ATOMIC(AtomicOp::Add, inst); + return BUFFER_ATOMIC(1, AtomicOp::Add, inst); default: LogMissingOpcode(inst); } @@ -416,21 +416,21 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns ir.StoreBuffer(num_dwords, handle, address, value, info); } -void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { +void Translator::BUFFER_ATOMIC(u32 num_dwords, AtomicOp op, const GcnInst& inst) { const auto& mtbuf = inst.control.mtbuf; - IR::VectorReg src_reg{inst.src[1].code}; - IR::VectorReg addr_reg{inst.src[0].code}; + const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; - - const IR::Value address = [&]() -> IR::Value { + const IR::Value address = [&] -> IR::Value { if (mtbuf.idxen && mtbuf.offen) { - return ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1)); + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); } if (mtbuf.idxen || mtbuf.offen) { - return ir.GetVectorReg(addr_reg); + return ir.GetVectorReg(vaddr); } - return IR::Value{}; + return {}; }(); + const IR::Value soffset{GetSrc(inst.src[3])}; + ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, "Non immediate offset not supported"); IR::BufferInstInfo info{}; info.index_enable.Assign(mtbuf.idxen); @@ -440,7 +440,8 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3)); - const IR::Value value = ir.GetVectorReg(src_reg); + + const IR::Value value = ir.LoadBufferFormat(num_dwords, handle, address, info); const IR::Value result = [&] { switch (op) { @@ -470,8 +471,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { UNREACHABLE(); } }(); - const IR::U32F32 c_result = static_cast(result); - ir.SetVectorReg(src_reg, c_result); + + // TODO: Check if unused + // const IR::VectorReg dst_reg{inst.src[1].code}; + ir.StoreBuffer(num_dwords, handle, address, value, info); } void Translator::IMAGE_GET_LOD(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index b7c9d66ad..f94299fb5 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -91,17 +91,17 @@ OPCODE(StoreBufferF32x4, Void, Opaq OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, ) // Buffer atomic operations -OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) -OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicIAdd32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicSMin32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicUMin32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicSMax32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicUMax32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicInc32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicDec32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicAnd32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicExchange32, U32, Opaque, Opaque, U32, ) // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )