From 3bc876ca78a680d76a346d175febe0f8ac4003dd Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 15 Apr 2025 22:31:58 -0700 Subject: [PATCH 1/8] renderer_vulkan: Improve handling of required vs optional extensions. (#2792) * renderer_vulkan: Improve handling of required vs optional extensions. * documents: Update quickstart Vulkan requirements. --- documents/Quickstart/Quickstart.md | 4 +- .../renderer_vulkan/vk_instance.cpp | 25 +++++++------ src/video_core/renderer_vulkan/vk_instance.h | 6 +++ .../renderer_vulkan/vk_rasterizer.cpp | 37 +++++++++++-------- 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/documents/Quickstart/Quickstart.md b/documents/Quickstart/Quickstart.md index 55825ac7d..62df95e71 100644 --- a/documents/Quickstart/Quickstart.md +++ b/documents/Quickstart/Quickstart.md @@ -29,8 +29,8 @@ SPDX-License-Identifier: GPL-2.0-or-later ### GPU - A graphics card with at least 1GB of VRAM -- Keep your graphics drivers up to date -- Vulkan 1.3 support (required) +- Up-to-date graphics drivers +- Vulkan 1.3 with the `VK_KHR_swapchain` and `VK_KHR_push_descriptor` extensions ### RAM diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index f83f63036..f20e91ec8 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -242,18 +242,21 @@ bool Instance::CreateDevice() { // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. - add_extension(VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME); - add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); - add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); - add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); - add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); - add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME); - add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME); - add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); + ASSERT(add_extension(VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME)); + ASSERT(add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME)); + ASSERT(add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME)); + ASSERT(add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)); + ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME)); + ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME)); + ASSERT(add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME)); + ASSERT(add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME)); - add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); - add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); - add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); + // Required + ASSERT(add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME)); + ASSERT(add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME)); + + // Optional + depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); if (dynamic_state_3) { dynamic_state_3_features = diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 6de419041..a9de01f84 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -104,6 +104,11 @@ public: return depth_clip_control; } + /// Returns true when VK_EXT_depth_range_unrestricted is supported + bool IsDepthRangeUnrestrictedSupported() const { + return depth_range_unrestricted; + } + /// Returns true when the extendedDynamicState3ColorWriteMask feature of /// VK_EXT_extended_dynamic_state3 is supported. bool IsDynamicColorWriteMaskSupported() const { @@ -340,6 +345,7 @@ private: bool custom_border_color{}; bool fragment_shader_barycentric{}; bool depth_clip_control{}; + bool depth_range_unrestricted{}; bool dynamic_state_3{}; bool vertex_input_dynamic_state{}; bool robustness2{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 13779977d..f8d09f011 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1018,32 +1018,37 @@ void Rasterizer::UpdateViewportScissorState() const { const auto zoffset = vp_ctl.zoffset_enable ? vp.zoffset : 0.f; const auto zscale = vp_ctl.zscale_enable ? vp.zscale : 1.f; + + vk::Viewport viewport = { + .minDepth = zoffset - zscale * reduce_z, + .maxDepth = zscale + zoffset, + }; + if (!instance.IsDepthRangeUnrestrictedSupported()) { + // Unrestricted depth range not supported by device. Make best attempt + // by restricting to valid range. + viewport.minDepth = std::max(viewport.minDepth, 0.f); + viewport.maxDepth = std::min(viewport.maxDepth, 1.f); + } if (regs.IsClipDisabled()) { // In case if clipping is disabled we patch the shader to convert vertex position // from screen space coordinates to NDC by defining a render space as full hardware // window range [0..16383, 0..16383] and setting the viewport to its size. - viewports.push_back({ - .x = 0.f, - .y = 0.f, - .width = float(std::min(instance.GetMaxViewportWidth(), 16_KB)), - .height = float(std::min(instance.GetMaxViewportHeight(), 16_KB)), - .minDepth = zoffset - zscale * reduce_z, - .maxDepth = zscale + zoffset, - }); + viewport.x = 0.f; + viewport.y = 0.f; + viewport.width = float(std::min(instance.GetMaxViewportWidth(), 16_KB)); + viewport.height = float(std::min(instance.GetMaxViewportHeight(), 16_KB)); } else { const auto xoffset = vp_ctl.xoffset_enable ? vp.xoffset : 0.f; const auto xscale = vp_ctl.xscale_enable ? vp.xscale : 1.f; const auto yoffset = vp_ctl.yoffset_enable ? vp.yoffset : 0.f; const auto yscale = vp_ctl.yscale_enable ? vp.yscale : 1.f; - viewports.push_back({ - .x = xoffset - xscale, - .y = yoffset - yscale, - .width = xscale * 2.0f, - .height = yscale * 2.0f, - .minDepth = zoffset - zscale * reduce_z, - .maxDepth = zscale + zoffset, - }); + + viewport.x = xoffset - xscale; + viewport.y = yoffset - yscale; + viewport.width = xscale * 2.0f; + viewport.height = yscale * 2.0f; } + viewports.push_back(viewport); auto vp_scsr = scsr; if (regs.mode_control.vport_scissor_enable) { From 52ab1ed04b900206c989a3cb78932217421d545a Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 16 Apr 2025 08:08:09 -0700 Subject: [PATCH 2/8] shader_recompiler: Implement S_FLBIT_I32_B32 and V_MUL_HI_I32. (#2793) --- .../backend/spirv/emit_spirv_instructions.h | 4 ++-- .../backend/spirv/emit_spirv_integer.cpp | 13 +++++++++---- .../frontend/translate/scalar_alu.cpp | 13 +++++++++++++ .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_alu.cpp | 4 +++- src/shader_recompiler/ir/ir_emitter.cpp | 4 ++-- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 4 ++-- 8 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index fb37799f5..68438fbba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -338,8 +338,8 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b); Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); -Id EmitSMulExt(EmitContext& ctx, Id a, Id b); -Id EmitUMulExt(EmitContext& ctx, Id a, Id b); +Id EmitSMulHi(EmitContext& ctx, Id a, Id b); +Id EmitUMulHi(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); Id EmitIMul64(EmitContext& ctx, Id a, Id b); Id EmitSDiv32(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index 36726b6df..10bfbb2ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -72,12 +72,17 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b) { return ctx.OpISub(ctx.U64, a, b); } -Id EmitSMulExt(EmitContext& ctx, Id a, Id b) { - return ctx.OpSMulExtended(ctx.full_result_i32x2, a, b); +Id EmitSMulHi(EmitContext& ctx, Id a, Id b) { + const auto signed_a{ctx.OpBitcast(ctx.S32[1], a)}; + const auto signed_b{ctx.OpBitcast(ctx.S32[1], b)}; + const auto mul_ext{ctx.OpSMulExtended(ctx.full_result_i32x2, signed_a, signed_b)}; + const auto signed_hi{ctx.OpCompositeExtract(ctx.S32[1], mul_ext, 1)}; + return ctx.OpBitcast(ctx.U32[1], signed_hi); } -Id EmitUMulExt(EmitContext& ctx, Id a, Id b) { - return ctx.OpUMulExtended(ctx.full_result_u32x2, a, b); +Id EmitUMulHi(EmitContext& ctx, Id a, Id b) { + const auto mul_ext{ctx.OpUMulExtended(ctx.full_result_u32x2, a, b)}; + return ctx.OpCompositeExtract(ctx.U32[1], mul_ext, 1); } Id EmitIMul32(EmitContext& ctx, Id a, Id b) { diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 39f972848..c21c9b611 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -110,6 +110,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_FF1_I32_B32(inst); case Opcode::S_FF1_I32_B64: return S_FF1_I32_B64(inst); + case Opcode::S_FLBIT_I32_B32: + return S_FLBIT_I32_B32(inst); case Opcode::S_BITSET0_B32: return S_BITSET_B32(inst, 0); case Opcode::S_BITSET1_B32: @@ -660,6 +662,17 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) { SetDst(inst.dst[0], result); } +void Translator::S_FLBIT_I32_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + // Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB) + // position + const IR::U32 msb_pos = ir.FindUMsb(src0); + const IR::U32 pos_from_left = ir.ISub(ir.Imm32(31), msb_pos); + // Select 0xFFFFFFFF if src0 was 0 + const IR::U1 cond = ir.INotEqual(src0, ir.Imm32(0)); + SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))}); +} + void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) { const IR::U32 old_value{GetSrc(inst.dst[0])}; const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 6803cda25..520720b0f 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -119,6 +119,7 @@ public: void S_BCNT1_I32_B64(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst); void S_FF1_I32_B64(const GcnInst& inst); + void S_FLBIT_I32_B32(const GcnInst& inst); void S_BITSET_B32(const GcnInst& inst, u32 bit_value); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index da25f5434..787cf6ad3 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -394,6 +394,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_MUL_HI_U32(false, inst); case Opcode::V_MUL_LO_I32: return V_MUL_LO_U32(inst); + case Opcode::V_MUL_HI_I32: + return V_MUL_HI_U32(true, inst); case Opcode::V_MAD_U64_U32: return V_MAD_U64_U32(inst); case Opcode::V_NOP: @@ -1279,7 +1281,7 @@ void Translator::V_MUL_LO_U32(const GcnInst& inst) { void Translator::V_MUL_HI_U32(bool is_signed, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - const IR::U32 hi{ir.CompositeExtract(ir.IMulExt(src0, src1, is_signed), 1)}; + const IR::U32 hi{ir.IMulHi(src0, src1, is_signed)}; SetDst(inst.dst[0], hi); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a171d32a2..a51d126c7 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1388,8 +1388,8 @@ U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { } } -IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) { - return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b); +U32 IREmitter::IMulHi(const U32& a, const U32& b, bool is_signed) { + return Inst(is_signed ? Opcode::SMulHi : Opcode::UMulHi, a, b); } U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 48cc02725..f1d564b80 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -240,7 +240,7 @@ public: [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] Value IAddCary(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); - [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false); + [[nodiscard]] U32 IMulHi(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32 IMod(const U32& a, const U32& b, bool is_signed = false); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 93d759b74..10819f898 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -317,8 +317,8 @@ OPCODE(ISub32, U32, U32, OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) OPCODE(IMul64, U64, U64, U64, ) -OPCODE(SMulExt, U32x2, U32, U32, ) -OPCODE(UMulExt, U32x2, U32, U32, ) +OPCODE(SMulHi, U32, U32, U32, ) +OPCODE(UMulHi, U32, U32, U32, ) OPCODE(SDiv32, U32, U32, U32, ) OPCODE(UDiv32, U32, U32, U32, ) OPCODE(SMod32, U32, U32, U32, ) From 04e4ce0775a6a9aa4aa33456a92b68860c1c129f Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 16 Apr 2025 09:52:18 -0700 Subject: [PATCH 3/8] fix: Bad Intel drivers. --- src/video_core/renderer_vulkan/vk_instance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index f20e91ec8..c28e22985 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -248,7 +248,7 @@ bool Instance::CreateDevice() { ASSERT(add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)); ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME)); ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME)); - ASSERT(add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME)); + ASSERT(add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME) || driver_id == vk::DriverId::eIntelProprietaryWindows); ASSERT(add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME)); // Required From 243ee04b1c37b319a3bcf6d3bbeccaff3eb041a8 Mon Sep 17 00:00:00 2001 From: Missake212 Date: Wed, 16 Apr 2025 17:54:05 +0100 Subject: [PATCH 4/8] Implement DS_READ2ST64_B64 (#2795) --- src/shader_recompiler/frontend/translate/data_share.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 22f5b8644..acffae14b 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -67,6 +67,8 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_READ(64, false, false, false, inst); case Opcode::DS_READ2_B64: return DS_READ(64, false, true, false, inst); + case Opcode::DS_READ2ST64_B64: + return DS_READ(64, false, true, true, inst); default: LogMissingOpcode(inst); } From ddf3df7f564baf6de5e472b2a060e53acbb467ca Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:48:17 -0700 Subject: [PATCH 5/8] fix: clang-format --- src/video_core/renderer_vulkan/vk_instance.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index c28e22985..0df020116 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -248,7 +248,8 @@ bool Instance::CreateDevice() { ASSERT(add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)); ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME)); ASSERT(add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME)); - ASSERT(add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME) || driver_id == vk::DriverId::eIntelProprietaryWindows); + ASSERT(add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME) || + driver_id == vk::DriverId::eIntelProprietaryWindows); ASSERT(add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME)); // Required From aa8dab5371777105a3112498faa821d79aa3cab4 Mon Sep 17 00:00:00 2001 From: Fire Cube Date: Wed, 16 Apr 2025 23:24:18 +0200 Subject: [PATCH 6/8] shader_recompiler: Implement S_SUBB_U32 instruction (#2796) * add S_SUBB_U32 instruction * add missing case * move case to match enum --- .../frontend/translate/scalar_alu.cpp | 13 +++++++++++++ .../frontend/translate/translate.h | 1 + 2 files changed, 14 insertions(+) diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index c21c9b611..3a8e894ae 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -30,6 +30,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_SUB_I32(inst); case Opcode::S_ADDC_U32: return S_ADDC_U32(inst); + case Opcode::S_SUBB_U32: + return S_SUBB_U32(inst); case Opcode::S_MIN_I32: return S_MIN_U32(true, inst); case Opcode::S_MIN_U32: @@ -238,6 +240,17 @@ void Translator::S_SUB_U32(const GcnInst& inst) { ir.SetScc(ir.IGreaterThan(src1, src0, false)); } +void Translator::S_SUBB_U32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 borrow{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))}; + const IR::U32 result{ir.ISub(ir.ISub(src0, src1), borrow)}; + SetDst(inst.dst[0], result); + + const IR::U32 sum_with_borrow{ir.IAdd(src1, borrow)}; + ir.SetScc(ir.ILessThan(src0, sum_with_borrow, false)); +} + void Translator::S_ADD_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 520720b0f..32185a21f 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -80,6 +80,7 @@ public: // SOP2 void S_ADD_U32(const GcnInst& inst); void S_SUB_U32(const GcnInst& inst); + void S_SUBB_U32(const GcnInst& inst); void S_ADD_I32(const GcnInst& inst); void S_SUB_I32(const GcnInst& inst); void S_ADDC_U32(const GcnInst& inst); From 62a4182aca6a33b1899e871c8ff35410708d3752 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Wed, 16 Apr 2025 17:35:14 -0700 Subject: [PATCH 7/8] shader_recompiler: Fill in IMAGE_GATHER4_* variants in table. (#2798) --- src/shader_recompiler/frontend/format.cpp | 64 +++++++++++------------ 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index f89f0a582..52c8c733e 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -3569,19 +3569,19 @@ constexpr std::array InstructionFormatMIMG = {{ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, // 65 = IMAGE_GATHER4_CL - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, {}, {}, // 68 = IMAGE_GATHER4_L - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 69 = IMAGE_GATHER4_B - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 70 = IMAGE_GATHER4_B_CL - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 71 = IMAGE_GATHER4_LZ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, @@ -3589,19 +3589,19 @@ constexpr std::array InstructionFormatMIMG = {{ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, // 73 = IMAGE_GATHER4_C_CL - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, {}, {}, // 76 = IMAGE_GATHER4_C_L - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 77 = IMAGE_GATHER4_C_B - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 78 = IMAGE_GATHER4_C_B_CL - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 79 = IMAGE_GATHER4_C_LZ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, @@ -3609,19 +3609,19 @@ constexpr std::array InstructionFormatMIMG = {{ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, // 81 = IMAGE_GATHER4_CL_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, {}, {}, // 84 = IMAGE_GATHER4_L_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 85 = IMAGE_GATHER4_B_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 86 = IMAGE_GATHER4_B_CL_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 87 = IMAGE_GATHER4_LZ_O {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, @@ -3629,19 +3629,19 @@ constexpr std::array InstructionFormatMIMG = {{ {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, // 89 = IMAGE_GATHER4_C_CL_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, {}, {}, // 92 = IMAGE_GATHER4_C_L_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 93 = IMAGE_GATHER4_C_B_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 94 = IMAGE_GATHER4_C_B_CL_O - {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Undefined, - ScalarType::Undefined}, + {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, + ScalarType::Float32}, // 95 = IMAGE_GATHER4_C_LZ_O {InstClass::VectorMemImgSmp, InstCategory::VectorMemory, 4, 1, ScalarType::Uint32, ScalarType::Float32}, From ddc05e8a5f43819ce76340552cfcd2b9db6641ff Mon Sep 17 00:00:00 2001 From: Dmugetsu <168934208+diegolix29@users.noreply.github.com> Date: Wed, 16 Apr 2025 18:56:27 -0600 Subject: [PATCH 8/8] Implementing DS_SUB_U32, DS_INC_U32, DS_DEC_U32. (#2797) * Implementing DS_SUB_U32, DS_INC_U32, DS_DEC_U32, DS_WRITE_SRC2_B32, DS_WRITE_SRC2_B64. * Added ir instructions for new opcodes. Removing Write implementations. Maping operation S_BFE_I32 as it was added in translate but wasnt pointing to anything. * Suggestions --- .../backend/spirv/emit_spirv_atomic.cpp | 21 ++++++++++ .../backend/spirv/emit_spirv_instructions.h | 4 ++ .../frontend/translate/data_share.cpp | 42 +++++++++++++++++++ .../frontend/translate/translate.h | 3 ++ src/shader_recompiler/ir/ir_emitter.cpp | 12 ++++++ src/shader_recompiler/ir/ir_emitter.h | 4 ++ src/shader_recompiler/ir/opcodes.inc | 3 ++ 7 files changed, 89 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 4faa99fe8..c6ec65606 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -21,6 +21,15 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } +Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(2U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics); +} + Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { if (Sirit::ValidId(buffer_size)) { // Bounds checking enabled, wrap in a conditional branch to make sure that @@ -99,6 +108,18 @@ Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); } +Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub); +} + +Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset) { + return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); +} + +Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset) { + return SharedAtomicU32_IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); +} + Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 68438fbba..9b7528be8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -130,6 +130,10 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicIIncrement32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicIDecrement32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value); + Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index acffae14b..c29497ada 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -13,6 +13,12 @@ void Translator::EmitDataShare(const GcnInst& inst) { // DS case Opcode::DS_ADD_U32: return DS_ADD_U32(inst, false); + case Opcode::DS_SUB_U32: + return DS_SUB_U32(inst, false); + case Opcode::DS_INC_U32: + return DS_INC_U32(inst, false); + case Opcode::DS_DEC_U32: + return DS_DEC_U32(inst, false); case Opcode::DS_MIN_I32: return DS_MIN_U32(inst, true, false); case Opcode::DS_MAX_I32: @@ -35,6 +41,8 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_WRITE(32, false, true, true, inst); case Opcode::DS_ADD_RTN_U32: return DS_ADD_U32(inst, true); + case Opcode::DS_SUB_RTN_U32: + return DS_SUB_U32(inst, true); case Opcode::DS_MIN_RTN_U32: return DS_MIN_U32(inst, false, true); case Opcode::DS_MAX_RTN_U32: @@ -228,6 +236,40 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.QuadShuffle(src, index)); } +void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicIIncrement(addr_offset); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicIDecrement(addr_offset); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + +void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) { + const IR::U32 addr{GetSrc(inst.src[0])}; + const IR::U32 data{GetSrc(inst.src[1])}; + const IR::U32 offset = + ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); + const IR::U32 addr_offset = ir.IAdd(addr, offset); + const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data); + if (rtn) { + SetDst(inst.dst[0], IR::U32{original_val}); + } +} + void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 32185a21f..68d5e8dc8 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -275,6 +275,9 @@ public: void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); void DS_APPEND(const GcnInst& inst); void DS_CONSUME(const GcnInst& inst); + void DS_SUB_U32(const GcnInst& inst, bool rtn); + void DS_INC_U32(const GcnInst& inst, bool rtn); + void DS_DEC_U32(const GcnInst& inst, bool rtn); // Buffer Memory // MUBUF / MTBUF diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index a51d126c7..e8836bb4c 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -357,6 +357,18 @@ U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) { return Inst(Opcode::SharedAtomicXor32, address, data); } +U32 IREmitter::SharedAtomicIIncrement(const U32& address) { + return Inst(Opcode::SharedAtomicIIncrement32, address); +} + +U32 IREmitter::SharedAtomicIDecrement(const U32& address) { + return Inst(Opcode::SharedAtomicIDecrement32, address); +} + +U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) { + return Inst(Opcode::SharedAtomicISub32, address, data); +} + U32 IREmitter::ReadConst(const Value& base, const U32& offset) { return Inst(Opcode::ReadConst, base, offset); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index f1d564b80..186d83a07 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -106,6 +106,10 @@ public: [[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data); [[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data); + [[nodiscard]] U32 SharedAtomicIIncrement(const U32& address); + [[nodiscard]] U32 SharedAtomicIDecrement(const U32& address); + [[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data); + [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 10819f898..4932ff9a0 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -44,6 +44,9 @@ OPCODE(SharedAtomicUMax32, U32, U32, OPCODE(SharedAtomicAnd32, U32, U32, U32, ) OPCODE(SharedAtomicOr32, U32, U32, U32, ) OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicISub32, U32, U32, U32, ) +OPCODE(SharedAtomicIIncrement32, U32, U32, ) +OPCODE(SharedAtomicIDecrement32, U32, U32, ) // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, )