diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp index b2c795667..8750e2b18 100644 --- a/src/shader_recompiler/frontend/copy_shader.cpp +++ b/src/shader_recompiler/frontend/copy_shader.cpp @@ -29,6 +29,14 @@ CopyShaderData ParseCopyShader(std::span code) { sources[inst.dst[0].code] = inst.control.sopk.simm; break; } + case Gcn::Opcode::S_MOV_B32: { + sources[inst.dst[0].code] = inst.src[0].code; + break; + } + case Gcn::Opcode::S_ADDK_I32: { + sources[inst.dst[0].code] += inst.control.sopk.simm; + break; + } case Gcn::Opcode::EXP: { const auto& exp = inst.control.exp; const IR::Attribute semantic = static_cast(exp.target); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index be9c4d62c..79bc33f0c 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -226,6 +226,7 @@ public: void V_MED3_I32(const GcnInst& inst); void V_SAD(const GcnInst& inst); void V_SAD_U32(const GcnInst& inst); + void V_CVT_PK_U16_U32(const GcnInst& inst); void V_CVT_PK_U8_F32(const GcnInst& inst); void V_LSHL_B64(const GcnInst& inst); void V_MUL_F64(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 279695461..66ebb1ebb 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -157,6 +157,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_RCP_F64(inst); case Opcode::V_RCP_IFLAG_F32: return V_RCP_F32(inst); + case Opcode::V_RCP_CLAMP_F32: + return V_RCP_F32(inst); case Opcode::V_RSQ_CLAMP_F32: return V_RSQ_F32(inst); case Opcode::V_RSQ_LEGACY_F32: @@ -355,6 +357,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MED3_I32(inst); case Opcode::V_SAD_U32: return V_SAD_U32(inst); + case Opcode::V_CVT_PK_U16_U32: + return V_CVT_PK_U16_U32(inst); case Opcode::V_CVT_PK_U8_F32: return V_CVT_PK_U8_F32(inst); case Opcode::V_LSHL_B64: @@ -1108,6 +1112,14 @@ void Translator::V_SAD_U32(const GcnInst& inst) { SetDst(inst.dst[0], ir.IAdd(result, src2)); } +void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false); + const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false); + SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16))); +} + void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) { const IR::F32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index bb9ea92d3..a956b030d 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -405,12 +405,12 @@ struct PM4DmaData { template T SrcAddress() const { - return reinterpret_cast(src_addr_lo | u64(src_addr_hi) << 32); + return std::bit_cast(src_addr_lo | u64(src_addr_hi) << 32); } template T DstAddress() const { - return reinterpret_cast(dst_addr_lo | u64(dst_addr_hi) << 32); + return std::bit_cast(dst_addr_lo | u64(dst_addr_hi) << 32); } u32 NumBytes() const noexcept {