From 20f7a7231ea942083df887673506c9d97f816dc6 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 3 Apr 2025 12:11:05 +0200 Subject: [PATCH] Implement some opecodes --- CMakeLists.txt | 9 +- .../backend/asm_x64/emit_x64.cpp | 94 +++- .../backend/asm_x64/emit_x64_atomic.cpp | 138 +++++ .../backend/asm_x64/emit_x64_barrier.cpp | 20 + .../asm_x64/emit_x64_bitwise_conversion.cpp | 228 ++++++++ .../backend/asm_x64/emit_x64_composite.cpp | 242 +++++++++ .../backend/asm_x64/emit_x64_condition.cpp | 6 - .../backend/asm_x64/emit_x64_condition.h | 8 - .../backend/asm_x64/emit_x64_image.cpp | 62 +++ .../backend/asm_x64/emit_x64_instructions.h | 485 ++++++++++++++++++ .../asm_x64/emit_x64_shared_memory.cpp | 24 + .../backend/asm_x64/x64_emit_context.cpp | 6 +- .../backend/asm_x64/x64_emit_context.h | 2 +- .../spirv/emit_spirv_context_get_set.cpp | 4 + .../backend/spirv/emit_spirv_instructions.h | 2 +- .../backend/spirv/emit_spirv_special.cpp | 4 - .../ir/compute_value/do_nop_functions.h | 2 +- src/shader_recompiler/ir/ir_emitter.cpp | 8 +- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/microinstruction.cpp | 2 +- src/shader_recompiler/ir/opcodes.inc | 3 +- .../passes/flatten_extended_userdata_pass.cpp | 2 +- 22 files changed, 1312 insertions(+), 41 deletions(-) create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp delete mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp delete mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_condition.h create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h create mode 100644 src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a550a7a88..14ca4ded6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -899,8 +899,13 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h if (ARCHITECTURE STREQUAL "x86_64") set(SHADER_RECOMPILER ${SHADER_RECOMPILER} - src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp - src/shader_recompiler/backend/asm_x64/emit_x64_condition.h + src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp + src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h + src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp src/shader_recompiler/backend/asm_x64/emit_x64.cpp src/shader_recompiler/backend/asm_x64/emit_x64.h src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp index cc25ee27b..e128216fc 100644 --- a/src/shader_recompiler/backend/asm_x64/emit_x64.cpp +++ b/src/shader_recompiler/backend/asm_x64/emit_x64.cpp @@ -3,7 +3,7 @@ #include "common/func_traits.h" #include "shader_recompiler/backend/asm_x64/emit_x64.h" -#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" +#include "shader_recompiler/backend/asm_x64/emit_x64_instructions.h" #include "shader_recompiler/backend/asm_x64/x64_emit_context.h" #include "shader_recompiler/backend/asm_x64/x64_utils.h" @@ -12,6 +12,8 @@ namespace Shader::Backend::X64 { using namespace Xbyak; using namespace Xbyak::util; +namespace { + static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, bool invert) { IR::Value cond = ref->Arg(0); if (cond.IsImmediate()) { @@ -20,7 +22,7 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b ctx.Code().jmp(label); } } else { - Operand& op = ctx.Def(cond)[0]; + const Operand& op = ctx.Def(cond.InstRecursive())[0]; if (op.isREG()) { Reg8 reg = op.getReg().cvt8(); ctx.Code().test(reg, reg); @@ -37,7 +39,7 @@ static void EmitCondition(EmitContext& ctx, const IR::Inst* ref, Label& label, b template ArgType Arg(EmitContext& ctx, const IR::Value& arg) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { return ctx.Def(arg); } else if constexpr (std::is_same_v) { return arg; @@ -62,7 +64,7 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) { } template -static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { +void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) { using Traits = Common::FuncTraits; if constexpr (has_dest) { if constexpr (is_first_arg_inst) { @@ -82,7 +84,7 @@ static void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence) } template -static void Invoke(EmitContext& ctx, IR::Inst* inst) { +void Invoke(EmitContext& ctx, IR::Inst* inst) { using Traits = Common::FuncTraits; static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); if constexpr (Traits::NUM_ARGS == 1) { @@ -101,7 +103,7 @@ static void Invoke(EmitContext& ctx, IR::Inst* inst) { } } -static void EmitInst(EmitContext& ctx, IR::Inst* inst) { +void EmitInst(EmitContext& ctx, IR::Inst* inst) { switch (inst->GetOpcode()) { #define OPCODE(name, result_type, ...) \ case IR::Opcode::name: \ @@ -112,7 +114,7 @@ static void EmitInst(EmitContext& ctx, IR::Inst* inst) { UNREACHABLE_MSG("Invalid opcode {}", inst->GetOpcode()); } -static void Traverse(EmitContext& ctx, const IR::Program& program) { +void Traverse(EmitContext& ctx, const IR::Program& program) { CodeGenerator& c = ctx.Code(); for (const IR::AbstractSyntaxNode& node : program.syntax_list) { ctx.ResetTempRegs(); @@ -164,6 +166,8 @@ static void Traverse(EmitContext& ctx, const IR::Program& program) { } } +} // Anonymous namespace + void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { EmitContext context(program, c); Traverse(context, program); @@ -171,4 +175,80 @@ void EmitX64(const IR::Program& program, Xbyak::CodeGenerator& c) { context.Epilogue(); } +void EmitPhi(EmitContext& ctx) { + +} + +void EmitVoid(EmitContext&) {} + +void EmitIdentity(EmitContext& ctx) { + throw NotImplementedException("Forward identity declaration"); +} + +void EmitConditionRef(EmitContext& ctx) { + +} + +void EmitReference(EmitContext&) {} + +void EmitPhiMove(EmitContext&) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetScc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetExec(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVcc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetSccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetVccHi(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitGetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetScc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetExec(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVcc(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetSccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVccLo(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetVccHi(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + +void EmitSetM0(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + } // namespace Shader::Backend::X64 \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp new file mode 100644 index 000000000..1b865fdf9 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_atomic.cpp @@ -0,0 +1,138 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + + +void EmitSharedAtomicIAdd32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicIAdd32"); +} + +void EmitSharedAtomicUMax32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicUMax32"); +} + +void EmitSharedAtomicSMax32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicSMax32"); +} + +void EmitSharedAtomicUMin32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicUMin32"); +} + +void EmitSharedAtomicSMin32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicSMin32"); +} + +void EmitSharedAtomicAnd32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicAnd32"); +} + +void EmitSharedAtomicOr32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicOr32"); +} + +void EmitSharedAtomicXor32(EmitContext& ctx) { + throw NotImplementedException("SharedAtomicXor32"); +} + +void EmitBufferAtomicIAdd32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicIAdd32"); +} + +void EmitBufferAtomicSMin32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicSMin32"); +} + +void EmitBufferAtomicUMin32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicUMin32"); +} + +void EmitBufferAtomicSMax32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicSMax32"); +} + +void EmitBufferAtomicUMax32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicUMax32"); +} + +void EmitBufferAtomicInc32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicInc32"); +} + +void EmitBufferAtomicDec32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicDec32"); +} + +void EmitBufferAtomicAnd32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicAnd32"); +} + +void EmitBufferAtomicOr32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicOr32"); +} + +void EmitBufferAtomicXor32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicXor32"); +} + +void EmitBufferAtomicSwap32(EmitContext& ctx) { + throw NotImplementedException("BufferAtomicSwap32"); +} + +void EmitImageAtomicIAdd32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicIAdd32"); +} + +void EmitImageAtomicSMin32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicSMin32"); +} + +void EmitImageAtomicUMin32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicUMin32"); +} + +void EmitImageAtomicSMax32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicSMax32"); +} + +void EmitImageAtomicUMax32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicUMax32"); +} + +void EmitImageAtomicInc32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicInc32"); +} + +void EmitImageAtomicDec32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicDec32"); +} + +void EmitImageAtomicAnd32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicAnd32"); +} + +void EmitImageAtomicOr32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicOr32"); +} + +void EmitImageAtomicXor32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicXor32"); +} + +void EmitImageAtomicExchange32(EmitContext& ctx) { + throw NotImplementedException("ImageAtomicExchange32"); +} + +void EmitDataAppend(EmitContext& ctx) { + throw NotImplementedException("DataAppend"); +} + +void EmitDataConsume(EmitContext& ctx) { + throw NotImplementedException("DataConsume"); +} + +} // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp new file mode 100644 index 000000000..62df58ae9 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_barrier.cpp @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitBarrier(EmitContext& ctx) { + +} + +void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { + +} + +void EmitDeviceMemoryBarrier(EmitContext& ctx) { + +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp new file mode 100644 index 000000000..def2974e2 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_bitwise_conversion.cpp @@ -0,0 +1,228 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" +#include "shader_recompiler/backend/asm_x64/x64_utils.h" + +using namespace Xbyak; +using namespace Xbyak::util; + +namespace Shader::Backend::X64 { + +void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src) { + // We handle 16-bit floats in general purpose registers + MovGP(ctx, dest[0], src[0]); +} + +void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (src[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (dest[0].isMEM()) { + ctx.Code().movd(dest[0].getAddress(), src[0].getReg().cvt128()); + } else { + ctx.Code().movd(dword[rsp - 4], src[0].getReg().cvt128()); + ctx.Code().mov(dest[0], dword[rsp - 4]); + } +} + +void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg tmp = ctx.TempGPReg(false); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (src[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (dest[0].isMEM()) { + ctx.Code().movq(dest[0].getAddress(), src[0].getReg().cvt128()); + } else { + ctx.Code().movq(qword[rsp - 8], src[0].getReg().cvt128()); + ctx.Code().mov(dest[0], qword[rsp - 8]); + } +} + +void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src) { + MovGP(ctx, dest[0], src[0]); +} + +void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg32 tmp = ctx.TempGPReg(false).getReg().cvt32(); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (src[0].isMEM()) { + ctx.Code().movd(dest[0].getReg().cvt128(), src[0].getAddress()); + } else { + ctx.Code().mov(dword[rsp - 4], src[0]); + ctx.Code().movd(dest[0].getReg().cvt128(), dword[rsp - 4]); + } +} + +void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src) { + if (dest[0].isMEM() && src[0].isMEM()) { + Reg tmp = ctx.TempGPReg(false); + ctx.Code().mov(tmp, src[0]); + ctx.Code().mov(dest[0], tmp); + } else if (dest[0].isMEM()) { + ctx.Code().mov(dest[0], src[0]); + } else if (src[0].isMEM()) { + ctx.Code().movq(dest[0].getReg().cvt128(), src[0].getAddress()); + } else { + ctx.Code().mov(qword[rsp - 8], src[0].getReg()); + ctx.Code().mov(dest[0].getReg().cvt128(), qword[rsp - 8]); + } +} + +void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { + const bool is_mem = dest[0].isMEM() && (src[0].isMEM() || src[1].isMEM()); + Reg tmp = is_mem ? ctx.TempGPReg(false) : dest[0].getReg(); + ctx.Code().mov(tmp, src[0]); + ctx.Code().shl(tmp, 32); + ctx.Code().or_(tmp, src[0]); + if (is_mem) { + ctx.Code().mov(dest[0], tmp); + } +} + +void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Reg src0 = src[0].isMEM() ? ctx.TempGPReg() : src[0].getReg(); + if (src[0].isMEM()) { + ctx.Code().mov(src0, src[0]); + } + Reg dest1 = dest[1].isMEM() ? ctx.TempGPReg(false) : dest[1].getReg().changeBit(64); + ctx.Code().mov(dest1, src0); + ctx.Code().shr(dest1, 32); + if (dest[1].isMEM()) { + ctx.Code().mov(dest[1], dest1.cvt32()); + } + ctx.Code().mov(dest[0], src0.cvt32()); +} + +void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src) { + Xmm tmp = dest[0].isMEM() ? ctx.TempXmmReg(false) : dest[0].getReg().cvt128(); + MovFloat(ctx, tmp, src[0]); + ctx.Code().pinsrd(tmp, src[1], 1); + if (dest[0].isMEM()) { + ctx.Code().movss(dest[0].getAddress(), tmp); + } +} + +void EmitPackUnorm2x16(EmitContext& ctx) { + throw NotImplementedException("PackUnorm2x16"); +} + +void EmitUnpackUnorm2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackUnorm2x16"); +} + +void EmitPackSnorm2x16(EmitContext& ctx) { + throw NotImplementedException("PackSnorm2x16"); +} + +void EmitUnpackSnorm2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackSnorm2x16"); +} + +void EmitPackUint2x16(EmitContext& ctx) { + throw NotImplementedException("PackUint2x16"); +} + +void EmitUnpackUint2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackUint2x16"); +} + +void EmitPackSint2x16(EmitContext& ctx) { + throw NotImplementedException("PackSint2x16"); +} + +void EmitUnpackSint2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackSint2x16"); +} + +void EmitPackHalf2x16(EmitContext& ctx) { + throw NotImplementedException("PackHalf2x16"); +} + +void EmitUnpackHalf2x16(EmitContext& ctx) { + throw NotImplementedException("UnpackHalf2x16"); +} + +void EmitPackUnorm4x8(EmitContext& ctx) { + throw NotImplementedException("PackUnorm4x8"); +} + +void EmitUnpackUnorm4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackUnorm4x8"); +} + +void EmitPackSnorm4x8(EmitContext& ctx) { + throw NotImplementedException("PackSnorm4x8"); +} + +void EmitUnpackSnorm4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackSnorm4x8"); +} + +void EmitPackUint4x8(EmitContext& ctx) { + throw NotImplementedException("PackUint4x8"); +} + +void EmitUnpackUint4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackUint4x8"); +} + +void EmitPackSint4x8(EmitContext& ctx) { + throw NotImplementedException("PackSint4x8"); +} + +void EmitUnpackSint4x8(EmitContext& ctx) { + throw NotImplementedException("UnpackSint4x8"); +} + +void EmitPackUfloat10_11_11(EmitContext& ctx) { + throw NotImplementedException("PackUfloat10_11_11"); +} + +void EmitUnpackUfloat10_11_11(EmitContext& ctx) { + throw NotImplementedException("UnpackUfloat10_11_11"); +} + +void EmitPackUnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackUnorm2_10_10_10"); +} + +void EmitUnpackUnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackUnorm2_10_10_10"); +} + +void EmitPackSnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackSnorm2_10_10_10"); +} + +void EmitUnpackSnorm2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackSnorm2_10_10_10"); +} + +void EmitPackUint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackUint2_10_10_10"); +} + +void EmitUnpackUint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackUint2_10_10_10"); +} + +void EmitPackSint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("PackSint2_10_10_10"); +} + +void EmitUnpackSint2_10_10_10(EmitContext& ctx) { + throw NotImplementedException("UnpackSint2_10_10_10"); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp new file mode 100644 index 000000000..d03516dec --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_composite.cpp @@ -0,0 +1,242 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/backend/asm_x64/x64_utils.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +namespace { + +template +static const Operand& GetSuffleOperand(const Operands& comp1, const Operands& comp2, u32 index) { + if (index < N) { + return comp1[index]; + } else { + return comp2[index - N]; + } +} +} + +void EmitCompositeConstructU32x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); +} + +void EmitCompositeConstructU32x3(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src3[0]); +} + +void EmitCompositeConstructU32x4(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2, const Operands& src3, const Operands& src4) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src3[0]); + MovGP(ctx, dest[3], src4[0]); +} + +void EmitCompositeConstructU32x2x2(EmitContext& ctx, const Operands& dest, const Operands& src1, const Operands& src2) { + MovGP(ctx, dest[0], src1[0]); + MovGP(ctx, dest[1], src2[0]); + MovGP(ctx, dest[2], src1[1]); + MovGP(ctx, dest[3], src2[1]); +} + +void EmitCompositeExtractU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); +} + +void EmitCompositeExtractU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); +} + +void EmitCompositeExtractU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite, u32 index) { + MovGP(ctx, dest[0], composite[index]); +} + +void EmitCompositeInsertU32x2(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { + MovGP(ctx, dest[index], object[0]); +} + +void EmitCompositeInsertU32x3(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { + MovGP(ctx, dest[index], object[0]); +} + +void EmitCompositeInsertU32x4(EmitContext& ctx, const Operands& dest, const Operands& object, u32 index) { + MovGP(ctx, dest[index], object[0]); +} + +void EmitCompositeShuffleU32x2(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2) { + MovGP(ctx, dest[0], GetSuffleOperand<2>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<2>(composite1, composite2, idx2)); +} + +void EmitCompositeShuffleU32x3(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3) { + MovGP(ctx, dest[0], GetSuffleOperand<3>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<3>(composite1, composite2, idx2)); + MovGP(ctx, dest[2], GetSuffleOperand<3>(composite1, composite2, idx3)); +} + +void EmitCompositeShuffleU32x4(EmitContext& ctx, const Operands& dest, const Operands& composite1, const Operands& composite2, u32 idx1, u32 idx2, u32 idx3, u32 idx4) { + MovGP(ctx, dest[0], GetSuffleOperand<4>(composite1, composite2, idx1)); + MovGP(ctx, dest[1], GetSuffleOperand<4>(composite1, composite2, idx2)); + MovGP(ctx, dest[2], GetSuffleOperand<4>(composite1, composite2, idx3)); + MovGP(ctx, dest[3], GetSuffleOperand<4>(composite1, composite2, idx4)); +} + +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2); +} + +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3); +} + +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4); +} + +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F16[1], composite, index); +} + +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); +} + +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); +} + +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); +} + +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2); +} + +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3); +} + +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4); +} + +Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { + return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2); +} + +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { + return ctx.OpCompositeExtract(ctx.F32[1], composite, index); +} + +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); +} + +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); +} + +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); +} + +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +void EmitCompositeConstructF64x2(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x3(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeConstructF64x4(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x2(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x3(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +void EmitCompositeExtractF64x4(EmitContext&) { + UNREACHABLE_MSG("SPIR-V Instruction"); +} + +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); +} + +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); +} + +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { + return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); +} + +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp deleted file mode 100644 index 046454b6f..000000000 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.cpp +++ /dev/null @@ -1,6 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "shader_recompiler/backend/asm_x64/emit_x64_condition.h" - -namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h b/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h deleted file mode 100644 index 16d6093ea..000000000 --- a/src/shader_recompiler/backend/asm_x64/emit_x64_condition.h +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" - -namespace Shader::Backend::X64 {} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp new file mode 100644 index 000000000..bc0e436e1 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_image.cpp @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitImageSampleRaw(EmitContext& ctx) { + // We can reach this here. We done resource tracking pass yet. + throw NotImplementedException("ImageSampleRaw"); +} + +void EmitImageSampleImplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleImplicitLod"); +} + +void EmitImageSampleExplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleExplicitLod"); +} + +void EmitImageSampleDrefImplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleDrefImplicitLod"); +} + +void EmitImageSampleDrefExplicitLod(EmitContext& ctx) { + throw NotImplementedException("ImageSampleDrefExplicitLod"); +} + +void EmitImageGather(EmitContext& ctx) { + throw NotImplementedException("ImageGather"); +} + +void EmitImageGatherDref(EmitContext& ctx) { + throw NotImplementedException("ImageGatherDref"); +} + +void EmitImageQueryDimensions(EmitContext& ctx) { + throw NotImplementedException("ImageQueryDimensions"); +} + +void EmitImageQueryLod(EmitContext& ctx) { + throw NotImplementedException("ImageQueryLod"); +} + +void EmitImageGradient(EmitContext& ctx) { + throw NotImplementedException("ImageGradient"); +} + +void EmitImageRead(EmitContext& ctx) { + throw NotImplementedException("ImageRead"); +} + +void EmitImageWrite(EmitContext& ctx) { + throw NotImplementedException("ImageWrite"); +} + +void EmitCubeFaceIndex(EmitContext& ctx) { + throw NotImplementedException("CubeFaceIndex"); +} + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h new file mode 100644 index 000000000..51970986d --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_instructions.h @@ -0,0 +1,485 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "common/types.h" + +namespace Shader::IR { +enum class Attribute : u64; +enum class ScalarReg : u32; +enum class Patch : u64; +class Inst; +class Value; +} // namespace Shader::IR + +namespace Shader::Backend::X64 { + +using Operands = boost::container::static_vector; + +class EmitContext; + +// Microinstruction emitters +void EmitPhi(EmitContext& ctx); +void EmitVoid(EmitContext& ctx); +void EmitIdentity(EmitContext& ctx); +void EmitConditionRef(EmitContext& ctx, const IR::Value& value); +void EmitReference(EmitContext&); +void EmitPhiMove(EmitContext&); +void EmitJoin(EmitContext& ctx); +void EmitGetScc(EmitContext& ctx); +void EmitGetExec(EmitContext& ctx); +void EmitGetVcc(EmitContext& ctx); +void EmitGetSccLo(EmitContext& ctx); +void EmitGetVccLo(EmitContext& ctx); +void EmitGetVccHi(EmitContext& ctx); +void EmitGetM0(EmitContext& ctx); +void EmitSetScc(EmitContext& ctx); +void EmitSetExec(EmitContext& ctx); +void EmitSetVcc(EmitContext& ctx); +void EmitSetSccLo(EmitContext& ctx); +void EmitSetVccLo(EmitContext& ctx); +void EmitSetVccHi(EmitContext& ctx); +void EmitSetM0(EmitContext& ctx); +void EmitFPCmpClass32(EmitContext& ctx); +void EmitPrologue(EmitContext& ctx); +void EmitEpilogue(EmitContext& ctx); +void EmitDiscard(EmitContext& ctx); +void EmitDiscardCond(EmitContext& ctx, Id condition); +void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); +void EmitBarrier(EmitContext& ctx); +void EmitWorkgroupMemoryBarrier(EmitContext& ctx); +void EmitDeviceMemoryBarrier(EmitContext& ctx); +Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); +void EmitSetUserData(EmitContext& ctx, const IR::Value& offset, const IR::Value& data); +void EmitGetThreadBitScalarReg(EmitContext& ctx); +void EmitSetThreadBitScalarReg(EmitContext& ctx); +void EmitGetScalarRegister(EmitContext& ctx); +void EmitSetScalarRegister(EmitContext& ctx); +void EmitGetVectorRegister(EmitContext& ctx); +void EmitSetVectorRegister(EmitContext& ctx); +void EmitSetGotoVariable(EmitContext& ctx); +void EmitGetGotoVariable(EmitContext& ctx); +void EmitSetScc(EmitContext& ctx); +Id EmitReadConst(EmitContext& ctx, IR::Inst* inst); +Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index); +Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address); +void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +void EmitBufferAtomicIAdd32(EmitContext& ctx); +void EmitBufferAtomicSMin32(EmitContext& ctx); +void EmitBufferAtomicUMin32(EmitContext& ctx); +void EmitBufferAtomicSMax32(EmitContext& ctx); +void EmitBufferAtomicUMax32(EmitContext& ctx); +void EmitBufferAtomicInc32(EmitContext& ctx); +void EmitBufferAtomicDec32(EmitContext& ctx); +void EmitBufferAtomicAnd32(EmitContext& ctx); +void EmitBufferAtomicOr32(EmitContext& ctx); +void EmitBufferAtomicXor32(EmitContext& ctx); +void EmitBufferAtomicSwap32(EmitContext& ctx); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); +Id EmitGetTessGenericAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, Id comp_index); +void EmitSetTcsGenericAttribute(EmitContext& ctx, Id value, Id attr_index, Id comp_index); +Id EmitReadTcsGenericOuputAttribute(EmitContext& ctx, Id vertex_index, Id attr_index, + Id comp_index); +Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); +void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); +void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); +void EmitSetSampleMask(EmitContext& ctx, Id value); +void EmitSetFragDepth(EmitContext& ctx, Id value); +Id EmitWorkgroupId(EmitContext& ctx); +Id EmitLocalInvocationId(EmitContext& ctx); +Id EmitInvocationId(EmitContext& ctx); +Id EmitInvocationInfo(EmitContext& ctx); +Id EmitSampleId(EmitContext& ctx); +Id EmitUndefU1(EmitContext& ctx); +Id EmitUndefU8(EmitContext& ctx); +Id EmitUndefU16(EmitContext& ctx); +Id EmitUndefU32(EmitContext& ctx); +Id EmitUndefU64(EmitContext& ctx); +void EmitLoadSharedU32(EmitContext& ctx); +void EmitLoadSharedU64(EmitContext& ctx); +void EmitWriteSharedU32(EmitContext& ctx); +void EmitWriteSharedU64(EmitContext& ctx); +void EmitSharedAtomicIAdd32(EmitContext& ctx); +void EmitSharedAtomicUMax32(EmitContext& ctx); +void EmitSharedAtomicSMax32(EmitContext& ctx); +void EmitSharedAtomicUMin32(EmitContext& ctx); +void EmitSharedAtomicSMin32(EmitContext& ctx); +void EmitSharedAtomicAnd32(EmitContext& ctx); +void EmitSharedAtomicOr32(EmitContext& ctx); +void EmitSharedAtomicXor32(EmitContext& ctx); +Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructU32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); +Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); +Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); +Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); +Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +void EmitCompositeConstructF64x2(EmitContext& ctx); +void EmitCompositeConstructF64x3(EmitContext& ctx); +void EmitCompositeConstructF64x4(EmitContext& ctx); +void EmitCompositeExtractF64x2(EmitContext& ctx); +void EmitCompositeExtractF64x3(EmitContext& ctx); +void EmitCompositeExtractF64x4(EmitContext& ctx); +Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); +Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); +void EmitBitCastU16F16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastU32F32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastU64F64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastF16U16(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastF32U32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitBitCastF64U64(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitPackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitUnpackUint2x32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitPackFloat2x32(EmitContext& ctx, const Operands& dest, const Operands& src); +void EmitPackUnorm2x16(EmitContext& ctx); +void EmitUnpackUnorm2x16(EmitContext& ctx); +void EmitPackSnorm2x16(EmitContext& ctx); +void EmitUnpackSnorm2x16(EmitContext& ctx); +void EmitPackUint2x16(EmitContext& ctx); +void EmitUnpackUint2x16(EmitContext& ctx); +void EmitPackSint2x16(EmitContext& ctx); +void EmitUnpackSint2x16(EmitContext& ctx); +void EmitPackHalf2x16(EmitContext& ctx); +void EmitUnpackHalf2x16(EmitContext& ctx); +void EmitPackUnorm4x8(EmitContext& ctx); +void EmitUnpackUnorm4x8(EmitContext& ctx); +void EmitPackSnorm4x8(EmitContext& ctx); +void EmitUnpackSnorm4x8(EmitContext& ctx); +void EmitPackUint4x8(EmitContext& ctx); +void EmitUnpackUint4x8(EmitContext& ctx); +void EmitPackSint4x8(EmitContext& ctx); +void EmitUnpackSint4x8(EmitContext& ctx); +void EmitPackUfloat10_11_11(EmitContext& ctx); +void EmitUnpackUfloat10_11_11(EmitContext& ctx); +void EmitPackUnorm2_10_10_10(EmitContext& ctx); +void EmitUnpackUnorm2_10_10_10(EmitContext& ctx); +void EmitPackSnorm2_10_10_10(EmitContext& ctx); +void EmitUnpackSnorm2_10_10_10(EmitContext& ctx); +void EmitPackUint2_10_10_10(EmitContext& ctx); +void EmitUnpackUint2_10_10_10(EmitContext& ctx); +void EmitPackSint2_10_10_10(EmitContext& ctx); +void EmitUnpackSint2_10_10_10(EmitContext& ctx); +Id EmitFPAbs16(EmitContext& ctx, Id value); +Id EmitFPAbs32(EmitContext& ctx, Id value); +Id EmitFPAbs64(EmitContext& ctx, Id value); +Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); +Id EmitFPMax64(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); +Id EmitFPMin64(EmitContext& ctx, Id a, Id b); +Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitFPNeg16(EmitContext& ctx, Id value); +Id EmitFPNeg32(EmitContext& ctx, Id value); +Id EmitFPNeg64(EmitContext& ctx, Id value); +Id EmitFPSin(EmitContext& ctx, Id value); +Id EmitFPCos(EmitContext& ctx, Id value); +Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); +Id EmitFPLog2(EmitContext& ctx, Id value); +Id EmitFPRecip32(EmitContext& ctx, Id value); +Id EmitFPRecip64(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); +Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); +Id EmitFPSqrt(EmitContext& ctx, Id value); +Id EmitFPSaturate16(EmitContext& ctx, Id value); +Id EmitFPSaturate32(EmitContext& ctx, Id value); +Id EmitFPSaturate64(EmitContext& ctx, Id value); +Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); +Id EmitFPRoundEven16(EmitContext& ctx, Id value); +Id EmitFPRoundEven32(EmitContext& ctx, Id value); +Id EmitFPRoundEven64(EmitContext& ctx, Id value); +Id EmitFPFloor16(EmitContext& ctx, Id value); +Id EmitFPFloor32(EmitContext& ctx, Id value); +Id EmitFPFloor64(EmitContext& ctx, Id value); +Id EmitFPCeil16(EmitContext& ctx, Id value); +Id EmitFPCeil32(EmitContext& ctx, Id value); +Id EmitFPCeil64(EmitContext& ctx, Id value); +Id EmitFPTrunc16(EmitContext& ctx, Id value); +Id EmitFPTrunc32(EmitContext& ctx, Id value); +Id EmitFPTrunc64(EmitContext& ctx, Id value); +Id EmitFPFract32(EmitContext& ctx, Id value); +Id EmitFPFract64(EmitContext& ctx, Id value); +Id EmitFPFrexpSig32(EmitContext& ctx, Id value); +Id EmitFPFrexpSig64(EmitContext& ctx, Id value); +Id EmitFPFrexpExp32(EmitContext& ctx, Id value); +Id EmitFPFrexpExp64(EmitContext& ctx, Id value); +Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitFPIsNan16(EmitContext& ctx, Id value); +Id EmitFPIsNan32(EmitContext& ctx, Id value); +Id EmitFPIsNan64(EmitContext& ctx, Id value); +Id EmitFPIsInf32(EmitContext& ctx, Id value); +Id EmitFPIsInf64(EmitContext& ctx, Id value); +Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitIAdd64(EmitContext& ctx, Id a, Id b); +Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); +Id EmitISub32(EmitContext& ctx, Id a, Id b); +Id EmitISub64(EmitContext& ctx, Id a, Id b); +Id EmitSMulExt(EmitContext& ctx, Id a, Id b); +Id EmitUMulExt(EmitContext& ctx, Id a, Id b); +Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitIMul64(EmitContext& ctx, Id a, Id b); +Id EmitSDiv32(EmitContext& ctx, Id a, Id b); +Id EmitUDiv32(EmitContext& ctx, Id a, Id b); +Id EmitSMod32(EmitContext& ctx, Id a, Id b); +Id EmitUMod32(EmitContext& ctx, Id a, Id b); +Id EmitINeg32(EmitContext& ctx, Id value); +Id EmitINeg64(EmitContext& ctx, Id value); +Id EmitIAbs32(EmitContext& ctx, Id value); +Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); +Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); +Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseAnd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); +Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); +Id EmitBitReverse32(EmitContext& ctx, Id value); +Id EmitBitCount32(EmitContext& ctx, Id value); +Id EmitBitCount64(EmitContext& ctx, Id value); +Id EmitBitwiseNot32(EmitContext& ctx, Id value); +Id EmitFindSMsb32(EmitContext& ctx, Id value); +Id EmitFindUMsb32(EmitContext& ctx, Id value); +Id EmitFindILsb32(EmitContext& ctx, Id value); +Id EmitFindILsb64(EmitContext& ctx, Id value); +Id EmitSMin32(EmitContext& ctx, Id a, Id b); +Id EmitUMin32(EmitContext& ctx, Id a, Id b); +Id EmitSMax32(EmitContext& ctx, Id a, Id b); +Id EmitUMax32(EmitContext& ctx, Id a, Id b); +Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitIEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitINotEqual64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); +Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); +Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); +Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); +Id EmitLogicalNot(EmitContext& ctx, Id value); +Id EmitConvertS16F16(EmitContext& ctx, Id value); +Id EmitConvertS16F32(EmitContext& ctx, Id value); +Id EmitConvertS16F64(EmitContext& ctx, Id value); +Id EmitConvertS32F16(EmitContext& ctx, Id value); +Id EmitConvertS32F32(EmitContext& ctx, Id value); +Id EmitConvertS32F64(EmitContext& ctx, Id value); +Id EmitConvertS64F16(EmitContext& ctx, Id value); +Id EmitConvertS64F32(EmitContext& ctx, Id value); +Id EmitConvertS64F64(EmitContext& ctx, Id value); +Id EmitConvertU16F16(EmitContext& ctx, Id value); +Id EmitConvertU16F32(EmitContext& ctx, Id value); +Id EmitConvertU16F64(EmitContext& ctx, Id value); +Id EmitConvertU32F16(EmitContext& ctx, Id value); +Id EmitConvertU32F32(EmitContext& ctx, Id value); +Id EmitConvertU32F64(EmitContext& ctx, Id value); +Id EmitConvertU64F16(EmitContext& ctx, Id value); +Id EmitConvertU64F32(EmitContext& ctx, Id value); +Id EmitConvertU64F64(EmitContext& ctx, Id value); +Id EmitConvertU64U32(EmitContext& ctx, Id value); +Id EmitConvertU32U64(EmitContext& ctx, Id value); +Id EmitConvertF16F32(EmitContext& ctx, Id value); +Id EmitConvertF32F16(EmitContext& ctx, Id value); +Id EmitConvertF32F64(EmitContext& ctx, Id value); +Id EmitConvertF64F32(EmitContext& ctx, Id value); +Id EmitConvertF16S8(EmitContext& ctx, Id value); +Id EmitConvertF16S16(EmitContext& ctx, Id value); +Id EmitConvertF16S32(EmitContext& ctx, Id value); +Id EmitConvertF16S64(EmitContext& ctx, Id value); +Id EmitConvertF16U8(EmitContext& ctx, Id value); +Id EmitConvertF16U16(EmitContext& ctx, Id value); +Id EmitConvertF16U32(EmitContext& ctx, Id value); +Id EmitConvertF16U64(EmitContext& ctx, Id value); +Id EmitConvertF32S8(EmitContext& ctx, Id value); +Id EmitConvertF32S16(EmitContext& ctx, Id value); +Id EmitConvertF32S32(EmitContext& ctx, Id value); +Id EmitConvertF32S64(EmitContext& ctx, Id value); +Id EmitConvertF32U8(EmitContext& ctx, Id value); +Id EmitConvertF32U16(EmitContext& ctx, Id value); +Id EmitConvertF32U32(EmitContext& ctx, Id value); +Id EmitConvertF32U64(EmitContext& ctx, Id value); +Id EmitConvertF64S8(EmitContext& ctx, Id value); +Id EmitConvertF64S16(EmitContext& ctx, Id value); +Id EmitConvertF64S32(EmitContext& ctx, Id value); +Id EmitConvertF64S64(EmitContext& ctx, Id value); +Id EmitConvertF64U8(EmitContext& ctx, Id value); +Id EmitConvertF64U16(EmitContext& ctx, Id value); +Id EmitConvertF64U32(EmitContext& ctx, Id value); +Id EmitConvertF64U64(EmitContext& ctx, Id value); +Id EmitConvertU16U32(EmitContext& ctx, Id value); +Id EmitConvertU32U16(EmitContext& ctx, Id value); + +void EmitImageSampleRaw(EmitContext& ctx); +void EmitImageSampleImplicitLod(EmitContext& ctx); +void EmitImageSampleExplicitLod(EmitContext& ctx); +void EmitImageSampleDrefImplicitLod(EmitContext& ctx); +void EmitImageSampleDrefExplicitLod(EmitContext& ctx); +void EmitImageGather(EmitContext& ctx); +void EmitImageGatherDref(EmitContext& ctx); +void EmitImageQueryDimensions(EmitContext& ctx); +void EmitImageQueryLod(EmitContext& ctx); +void EmitImageGradient(EmitContext& ctx); +void EmitImageRead(EmitContext& ctx); +void EmitImageWrite(EmitContext& ctx); + +void EmitImageAtomicIAdd32(EmitContext& ctx); +void EmitImageAtomicSMin32(EmitContext& ctx); +void EmitImageAtomicUMin32(EmitContext& ctx); +void EmitImageAtomicSMax32(EmitContext& ctx); +void EmitImageAtomicUMax32(EmitContext& ctx); +void EmitImageAtomicInc32(EmitContext& ctx); +void EmitImageAtomicDec32(EmitContext& ctx); +void EmitImageAtomicAnd32(EmitContext& ctx); +void EmitImageAtomicOr32(EmitContext& ctx); +void EmitImageAtomicXor32(EmitContext& ctx); +void EmitImageAtomicExchange32(EmitContext& ctx); +Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords); +Id EmitLaneId(EmitContext& ctx); +Id EmitWarpId(EmitContext& ctx); +Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); +Id EmitReadFirstLane(EmitContext& ctx, Id value); +Id EmitReadLane(EmitContext& ctx, Id value, u32 lane); +Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane); +void EmitDataAppend(EmitContext& ctx); +void EmitDataConsume(EmitContext& ctx); + +void EmitEmitVertex(EmitContext& ctx); +void EmitEmitPrimitive(EmitContext& ctx); + +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp b/src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp new file mode 100644 index 000000000..5957afd33 --- /dev/null +++ b/src/shader_recompiler/backend/asm_x64/emit_x64_shared_memory.cpp @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/exception.h" +#include "shader_recompiler/backend/asm_x64/x64_emit_context.h" + +namespace Shader::Backend::X64 { + +void EmitLoadSharedU32(EmitContext& ctx) { + throw NotImplementedException("LoadSharedU32"); +} + +void EmitLoadSharedU64(EmitContext& ctx) { + throw NotImplementedException("LoadSharedU64"); +} + +void EmitWriteSharedU32(EmitContext& ctx) { + throw NotImplementedException("WriteSharedU32"); +} + +void EmitWriteSharedU64(EmitContext& ctx) { + throw NotImplementedException("WriteSharedU64"); +} +} \ No newline at end of file diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp index 1c5d5c103..1b706eeeb 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.cpp @@ -43,12 +43,12 @@ Xmm& EmitContext::TempXmmReg(bool reserve) { std::ranges::find(preserved_regs, reg) == preserved_regs.end()) { preserved_regs.push_back(reg); code.sub(rsp, 16); - code.movdqu(ptr[rsp], reg); + code.movups(ptr[rsp], reg); } return reg; } -Operands EmitContext::Def(IR::Inst* inst) { +const Operands& EmitContext::Def(IR::Inst* inst) { return inst_to_operands.at(inst); } @@ -141,7 +141,7 @@ void EmitContext::Epilogue() { for (auto it = preserved_regs.rbegin(); it != preserved_regs.rend(); ++it) { Reg& reg = *it; if (reg.isMMX()) { - code.movdqu(reg.cvt128(), ptr[rsp]); + code.movups(reg.cvt128(), ptr[rsp]); code.add(rsp, 16); } else { code.pop(reg); diff --git a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h index 59e0f2822..c967f9295 100644 --- a/src/shader_recompiler/backend/asm_x64/x64_emit_context.h +++ b/src/shader_recompiler/backend/asm_x64/x64_emit_context.h @@ -41,7 +41,7 @@ public: [[nodiscard]] Xbyak::Reg64& TempGPReg(bool reserve = true); [[nodiscard]] Xbyak::Xmm& TempXmmReg(bool reserve = true); - [[nodiscard]] Operands Def(IR::Inst* inst); + [[nodiscard]] const Operands& Def(IR::Inst* inst); [[nodiscard]] Operands Def(const IR::Value& value); [[nodiscard]] std::optional> PhiAssignments(IR::Block* block) const; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 5b4f3c3c5..fa6d45b0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -128,6 +128,10 @@ Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg) { return ud_reg; } +void EmitSetUserData(EmitContext& ctx) { + UNREACHABLE_MSG("Unreachable instruction"); +} + void EmitGetThreadBitScalarReg(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a8901d8f6..41f2d4514 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -47,12 +47,12 @@ void EmitPrologue(EmitContext& ctx); void EmitEpilogue(EmitContext& ctx); void EmitDiscard(EmitContext& ctx); void EmitDiscardCond(EmitContext& ctx, Id condition); -void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset); void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2, Id arg3, Id arg4); void EmitBarrier(EmitContext& ctx); void EmitWorkgroupMemoryBarrier(EmitContext& ctx); void EmitDeviceMemoryBarrier(EmitContext& ctx); Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg); +void EmitSetUserData(EmitContext& ctx); void EmitGetThreadBitScalarReg(EmitContext& ctx); void EmitSetThreadBitScalarReg(EmitContext& ctx); void EmitGetScalarRegister(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index f48c76395..fe7bd3356 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -102,10 +102,6 @@ void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { throw NotImplementedException("Geometry streams"); } -void EmitStoreFlatbuf(EmitContext& ctx, const IR::Value& data, const IR::Value& offset) { - UNREACHABLE_MSG("StoreFlatbuf not intended for SPIR-V"); -} - void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id fmt, Id arg0, Id arg1, Id arg2, Id arg3) { IR::DebugPrintFlags flags = inst->Flags(); std::array fmt_args = {arg0, arg1, arg2, arg3}; diff --git a/src/shader_recompiler/ir/compute_value/do_nop_functions.h b/src/shader_recompiler/ir/compute_value/do_nop_functions.h index 8b88742a1..66b9833e9 100644 --- a/src/shader_recompiler/ir/compute_value/do_nop_functions.h +++ b/src/shader_recompiler/ir/compute_value/do_nop_functions.h @@ -17,7 +17,6 @@ NOP_FUNCTION(Prologue) NOP_FUNCTION(Epilogue) NOP_FUNCTION(Discard) NOP_FUNCTION(DiscardCond) -NOP_FUNCTION(StoreFlatbuf) NOP_FUNCTION(DebugPrint) NOP_FUNCTION(ReadConst) @@ -45,6 +44,7 @@ NOP_FUNCTION(SharedAtomicOr32) NOP_FUNCTION(SharedAtomicXor32) NOP_FUNCTION(GetUserData) +NOP_FUNCTION(SetUserData) NOP_FUNCTION(GetThreadBitScalarReg) NOP_FUNCTION(SetThreadBitScalarReg) NOP_FUNCTION(GetScalarRegister) diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 77e12c30c..c696a4af5 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -143,6 +143,10 @@ U32 IREmitter::GetUserData(IR::ScalarReg reg) { return Inst(Opcode::GetUserData, reg); } +void IREmitter::SetUserData(const U32& offset, const U32& data) { + Inst(Opcode::SetUserData, offset, data); +} + U1 IREmitter::GetThreadBitScalarReg(IR::ScalarReg reg) { ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetThreadBitScalarReg, reg); @@ -1974,10 +1978,6 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& return Inst(Opcode::CubeFaceIndex, cube_coords); } -void IREmitter::StoreFlatbuf(const U32& data, const U32& offset) { - Inst(Opcode::StoreFlatbuf, data, offset); -} - // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc accepts format specifiers, e.g. %u, listed here: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 14f06eef8..a95fbde25 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -48,7 +48,6 @@ public: void Epilogue(); void Discard(); void Discard(const U1& cond); - void StoreFlatbuf(const U32& data, const U32& offset); void DebugPrint(const char* fmt, boost::container::small_vector args); void Barrier(); @@ -56,6 +55,7 @@ public: void DeviceMemoryBarrier(); [[nodiscard]] U32 GetUserData(IR::ScalarReg reg); + void SetUserData(const U32& offset, const U32& data); [[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg); void SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 45b0f3de0..9ff76e2ed 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -100,10 +100,10 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::ImageAtomicOr32: case Opcode::ImageAtomicXor32: case Opcode::ImageAtomicExchange32: - case Opcode::StoreFlatbuf: case Opcode::DebugPrint: case Opcode::EmitVertex: case Opcode::EmitPrimitive: + case Opcode::SetUserData: return true; default: return false; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index f30c1ee67..ac9ff4196 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -14,7 +14,6 @@ OPCODE(Prologue, Void, OPCODE(Epilogue, Void, ) OPCODE(Discard, Void, ) OPCODE(DiscardCond, Void, U1, ) -OPCODE(StoreFlatbuf, Void, U32, U32 ) OPCODE(DebugPrint, Void, StringLiteral, Opaque, Opaque, Opaque, Opaque, ) // Constant memory operations @@ -48,6 +47,8 @@ OPCODE(SharedAtomicXor32, U32, U32, // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) +// We don't use ScalarReg since we do arithmetics on the register index +OPCODE(SetUserData, Void, U32, U32 ) OPCODE(GetThreadBitScalarReg, U1, ScalarReg, ) OPCODE(SetThreadBitScalarReg, Void, ScalarReg, U1, ) OPCODE(GetScalarRegister, U32, ScalarReg, ) diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index a2b9a0056..307e72b45 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -159,7 +159,7 @@ static IR::Program GenerateSrtReadConstsSubProgram(IR::Program& program, PassInf } IR::IREmitter ir(*inst->GetParent(), ++IR::Block::InstructionList::s_iterator_to(*inst)); - ir.StoreFlatbuf(IR::U32(inst), save_offset); + ir.SetUserData(save_offset, IR::U32(inst)); } data.original_inst->SetFlags(1); IR::IREmitter ir(*data.original_inst);