From 6f6652a46ae81d013235a1a1d99ef218d0523ccc Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 10 Apr 2025 01:35:24 +0200 Subject: [PATCH] General fixes --- .../asm_x64/emit_x64_context_get_set.cpp | 52 +++++++++++++------ .../backend/asm_x64/emit_x64_instructions.h | 6 +-- .../backend/asm_x64/emit_x64_logical.cpp | 18 ++++--- .../backend/asm_x64/x64_emit_context.cpp | 6 ++- .../backend/asm_x64/x64_utils.cpp | 12 +++-- .../frontend/translate/scalar_memory.cpp | 2 +- src/shader_recompiler/info.h | 3 +- .../ir/passes/resource_tracking_pass.cpp | 3 +- src/shader_recompiler/ir/passes/srt.h | 2 +- 9 files changed, 67 insertions(+), 37 deletions(-) diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp index f097d68ae..8d40a973b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_context_get_set.cpp @@ -12,16 +12,15 @@ using namespace Xbyak::util; void EmitGetUserData(EmitContext& ctx, const Operands& dest, IR::ScalarReg reg) { const u32 offset = static_cast(reg) << 2; - Reg& tmp = ctx.TempGPReg(); + Reg tmp = ctx.TempGPReg(); ctx.Code().lea(tmp, ptr[ctx.UserData() + offset]); MovGP( ctx, dest[0], dword[tmp]); } void EmitSetUserData(EmitContext& ctx, const Operands& offset, const Operands& value) { - Reg& tmp = ctx.TempGPReg(); + Reg tmp = ctx.TempGPReg(); MovGP(ctx, tmp, offset[0]); - ctx.Code().shl(tmp, 2); - ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp]); + ctx.Code().lea(tmp, ptr[ctx.UserData() + tmp * 4]); MovGP(ctx, dword[tmp], value[0]); } @@ -58,32 +57,53 @@ void EmitGetGotoVariable(EmitContext&) { } void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset) { - Reg& tmp = ctx.TempGPReg(); + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64); + Reg off_tmp = offset[0].IsMem() ? ctx.TempGPReg() : offset[0].Reg().changeBit(64); MovGP(ctx, tmp, base[1]); + MovGP(ctx, off_tmp, offset[0]); ctx.Code().shl(tmp, 32); ctx.Code().or_(tmp, base[0].Op()); - if (offset[0].IsMem()) { - ctx.Code().add(tmp, offset[0].Mem()); - } else { - ctx.Code().lea(tmp, ptr[tmp + offset[0].Reg().cvt64()]); - } + ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]); MovGP(ctx, dest[0], dword[tmp]); } -void EmitReadConstBuffer(EmitContext& ctx) { - throw NotImplementedException("ReadConstBuffer"); +void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset) { + Reg tmp = dest[0].IsMem() ? ctx.TempGPReg() : dest[0].Reg().changeBit(64); + // Reconstruct base address + Reg off_tmp = ctx.TempGPReg(); + MovGP(ctx, tmp, handle[1]); + ctx.Code().and_(tmp, 0xFFF); + ctx.Code().shl(tmp, 32); + MovGP(ctx, off_tmp.cvt32(), handle[0]); + ctx.Code().and_(off_tmp.cvt32(), 0xFFFFFFFF); + ctx.Code().or_(tmp, off_tmp); + // TODO: we should correctly clamp the offset + MovGP(ctx, off_tmp, offset[0]); + ctx.Code().lea(tmp, ptr[tmp + off_tmp * 4]); + MovGP(ctx, dest[0], dword[tmp]); + } void EmitReadStepRate(EmitContext& ctx) { throw NotImplementedException("ReadStepRate"); } -void EmitGetAttribute(EmitContext& ctx) { - throw NotImplementedException("GetAttribute"); +void EmitGetAttribute(EmitContext& ctx, const Operands& dest) { + LOG_WARNING(Render_Recompiler, "GetAttribute stubbed, setting to 0.0"); + if (dest[0].IsMem()) { + ctx.Code().mov(dest[0].Mem(), 0); + } else { + ctx.Code().pxor(dest[0].Xmm(), dest[0].Xmm()); + } } -void EmitGetAttributeU32(EmitContext& ctx) { - throw NotImplementedException("GetAttributeU32"); +void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest) { + LOG_WARNING(Render_Recompiler, "GetAttributeU32 stubbed, setting to 0"); + if (dest[0].IsMem()) { + ctx.Code().mov(dest[0].Mem(), 0); + } else { + ctx.Code().xor_(dest[0].Reg(), dest[0].Reg()); + } } void EmitSetAttribute(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h index d4a1c961c..4c109d1cf 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -61,7 +61,7 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetScc(EmitContext& ctx); void EmitReadConst(EmitContext& ctx, const Operands& dest, const Operands& base, const Operands& offset); -void EmitReadConstBuffer(EmitContext& ctx); +void EmitReadConstBuffer(EmitContext& ctx, const Operands& dest, const Operands& handle, const Operands& offset); void EmitLoadBufferU8(EmitContext& ctx); void EmitLoadBufferU16(EmitContext& ctx); void EmitLoadBufferU32(EmitContext& ctx); @@ -95,8 +95,8 @@ void EmitBufferAtomicAnd32(EmitContext& ctx); void EmitBufferAtomicOr32(EmitContext& ctx); void EmitBufferAtomicXor32(EmitContext& ctx); void EmitBufferAtomicSwap32(EmitContext& ctx); -void EmitGetAttribute(EmitContext& ctx); -void EmitGetAttributeU32(EmitContext& ctx); +void EmitGetAttribute(EmitContext& ctx, const Operands& dest); +void EmitGetAttributeU32(EmitContext& ctx, const Operands& dest); void EmitSetAttribute(EmitContext& ctx); void EmitGetTessGenericAttribute(EmitContext& ctx); void EmitSetTcsGenericAttribute(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp index d1d7cfb74..a7714e91b 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_logical.cpp @@ -10,29 +10,33 @@ using namespace Xbyak; using namespace Xbyak::util; void EmitLogicalOr(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().or_(tmp, op2[0].Op()); + ctx.Code().or_(tmp.Op(), op2[0].Op()); + ctx.Code().and_(tmp.Op(), 1); MovGP(ctx, dest[0], tmp); } void EmitLogicalAnd(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); - MovGP(ctx, tmp, op1[0]); - ctx.Code().and_(tmp, op2[0].Op()); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0]; + MovGP(ctx, tmp.Op(), op1[0]); + ctx.Code().and_(tmp.Op(), op2[0].Op()); + ctx.Code().and_(tmp.Op(), 1); MovGP(ctx, dest[0], tmp); } void EmitLogicalXor(EmitContext& ctx, const Operands& dest, const Operands& op1, const Operands& op2) { - Reg tmp = dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0].Reg().cvt8(); + OperandHolder tmp = op2[0].IsMem() && dest[0].IsMem() ? ctx.TempGPReg().cvt8() : dest[0]; MovGP(ctx, tmp, op1[0]); - ctx.Code().xor_(tmp, op2[0].Op()); + ctx.Code().xor_(tmp.Op(), op2[0].Op()); + ctx.Code().and_(tmp.Op(), 1); MovGP(ctx, dest[0], tmp); } void EmitLogicalNot(EmitContext& ctx, const Operands& dest, const Operands& op) { MovGP(ctx, dest[0], op[0]); ctx.Code().not_(dest[0].Op()); + ctx.Code().and_(dest[0].Op(), 1); } } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 608faed70..a37a697e4 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -27,7 +27,8 @@ Reg64& EmitContext::TempGPReg(bool reserve) { if (idx > num_scratch_gp_regs && std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { preserved_regs.push_back(reg); - code.push(reg); + code.sub(rsp, 8); + code.mov(ptr[rsp], reg); } return reg; } @@ -154,7 +155,8 @@ void EmitContext::Epilogue() { code.movups(reg.cvt128(), ptr[rsp]); code.add(rsp, 16); } else { - code.pop(reg); + code.mov(reg, ptr[rsp]); + code.add(rsp, 8); } } preserved_regs.clear(); diff --git a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp index aedd12547..edbcb89c3 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_utils.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_utils.cpp @@ -157,13 +157,15 @@ void MovGP(EmitContext& ctx, const OperandHolder& dst, const OperandHolder& src) const u32 dst_bit = dst.Op().getBit(); OperandHolder tmp = is_mem2mem ? ctx.TempGPReg(false).changeBit(dst_bit) : dst; if (src_bit < dst_bit) { - if (!dst.IsMem() && !src.Op().isBit(32)) { + if (!tmp.IsMem() && !src.Op().isBit(32)) { c.movzx(tmp.Reg(), src.Op()); + } else if (tmp.IsMem()) { + Address addr = tmp.Mem(); + c.mov(addr, 0); + addr.setBit(dst_bit); + c.mov(addr, src.Reg()); } else { - if (dst.IsMem()) { - c.mov(tmp.Op(), 0); - } - c.mov(tmp.Op(), src.Op()); + c.mov(tmp.Reg().cvt32(), src.Op()); } } else if (src_bit > dst_bit) { OperandHolder src_tmp = src; diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp index c2e91b328..47240df27 100644 --- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp @@ -46,7 +46,7 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) { if (smrd.offset == SQ_SRC_LITERAL) { return ir.Imm32(inst.src[1].code); } - return ir.GetScalarReg(IR::ScalarReg(smrd.offset)); + return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2)); }(); const IR::ScalarReg sbase{inst.src[0].code * 2}; const IR::Value base = diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 8dcf9c5c4..6d57b6252 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -255,8 +255,9 @@ struct Info { std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes()); // Run the JIT program to walk the SRT and write the leaves to a flat buffer if (srt_info.walker_func) { - srt_info.walker_func(user_data.data(), flattened_ud_buf.data()); + srt_info.walker_func(flattened_ud_buf.data()); } + } void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) const { diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index b9640fafc..e0910d60a 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -244,7 +244,8 @@ SharpLocation TrackSharp(const IR::Inst* inst, const Shader::Info& info) { } return std::nullopt; }; - // We are not accounting for modifications to after the source. + // Value may be modified between the ReadConst/GetUserData and inst. + // We don't take this into account. const auto result = IR::BreadthFirstSearch(inst, pred); ASSERT_MSG(result, "Unable to track sharp source"); inst = result.value(); diff --git a/src/shader_recompiler/ir/passes/srt.h b/src/shader_recompiler/ir/passes/srt.h index 0ddc15ea6..7d01a2895 100644 --- a/src/shader_recompiler/ir/passes/srt.h +++ b/src/shader_recompiler/ir/passes/srt.h @@ -9,7 +9,7 @@ namespace Shader { -using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/); +using PFN_SrtWalker = void PS4_SYSV_ABI (*)(u32* /*flat_dst*/); struct PersistentSrtInfo { // Special case when fetch shader uses step rates.