From 3a7b2bf948a74d37c166d9c5af7084f28c5ce7dd Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 7 Jan 2025 14:36:16 -0800 Subject: [PATCH] shader_recompiler: Use native AMD cube instructions when possible. --- externals/sirit | 2 +- .../backend/spirv/emit_spirv_image.cpp | 16 ++++++ .../backend/spirv/emit_spirv_instructions.h | 2 + .../frontend/translate/vector_alu.cpp | 50 +++++++++++++------ src/shader_recompiler/ir/ir_emitter.cpp | 8 +++ src/shader_recompiler/ir/ir_emitter.h | 3 ++ src/shader_recompiler/ir/opcodes.inc | 4 ++ src/shader_recompiler/profile.h | 1 + .../renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 6 +++ .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + 11 files changed, 77 insertions(+), 17 deletions(-) diff --git a/externals/sirit b/externals/sirit index 1e74f4ef8..26ad5a9d0 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35 +Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 0831b25b2..182437b63 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -256,4 +256,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } +Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) { + if (ctx.profile.supports_native_cube_calc) { + return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords); + } else { + UNREACHABLE_MSG("SPIR-V Instruction"); + } +} + +Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) { + if (ctx.profile.supports_native_cube_calc) { + return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords); + } else { + UNREACHABLE_MSG("SPIR-V Instruction"); + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 0d9fcff46..37b6f7786 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value); +Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords); +Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords); Id EmitLaneId(EmitContext& ctx); Id EmitWarpId(EmitContext& ctx); Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 8dc7feed8..375c5f078 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -3,6 +3,7 @@ #include "shader_recompiler/frontend/opcodes.h" #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/profile.h" namespace Shader::Gcn { @@ -1061,14 +1062,19 @@ void Translator::V_CUBEID_F32(const GcnInst& inst) { const auto y = GetSrc(inst.src[1]); const auto z = GetSrc(inst.src[2]); - const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; - const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; - const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; - const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))}; - const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))}; - const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))}; + IR::F32 result; + if (profile.supports_native_cube_calc) { + result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z)); + } else { + const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; + const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; + const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; + const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))}; + const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))}; + const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))}; - const auto result{SelectCubeResult(x, y, z, x_face, y_face, z_face)}; + result = SelectCubeResult(x, y, z, x_face, y_face, z_face); + } SetDst(inst.dst[0], result); } @@ -1077,12 +1083,18 @@ void Translator::V_CUBESC_F32(const GcnInst& inst) { const auto y = GetSrc(inst.src[1]); const auto z = GetSrc(inst.src[2]); - const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; - const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; - const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)}; - const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))}; + IR::F32 result; + if (profile.supports_native_cube_calc) { + const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))}; + result = IR::F32{ir.CompositeExtract(coords, 0)}; + } else { + const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; + const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; + const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)}; + const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))}; - const auto result{SelectCubeResult(x, y, z, x_sc, x, z_sc)}; + result = SelectCubeResult(x, y, z, x_sc, x, z_sc); + } SetDst(inst.dst[0], result); } @@ -1091,11 +1103,17 @@ void Translator::V_CUBETC_F32(const GcnInst& inst) { const auto y = GetSrc(inst.src[1]); const auto z = GetSrc(inst.src[2]); - const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; - const IR::F32 x_z_sc{ir.FPNeg(y)}; - const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)}; + IR::F32 result; + if (profile.supports_native_cube_calc) { + const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))}; + result = IR::F32{ir.CompositeExtract(coords, 1)}; + } else { + const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; + const IR::F32 x_z_sc{ir.FPNeg(y)}; + const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)}; - const auto result{SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc)}; + result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc); + } SetDst(inst.dst[0], result); } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 9b1c5f060..8626bdfd1 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1758,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32& Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color); } +[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) { + return Inst(Opcode::CubeFaceCoord, cube_coords); +} + +[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) { + return Inst(Opcode::CubeFaceIndex, cube_coords); +} + // Debug print maps to SPIRV's NonSemantic DebugPrintf instruction // Renderdoc will hook in its own implementation of the SPIRV instruction // Renderdoc accepts format specifiers, e.g. %u, listed here: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 29708baab..783709775 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -342,6 +342,9 @@ public: void ImageWrite(const Value& handle, const Value& coords, const U32& lod, const U32& multisampling, const Value& color, TextureInstInfo info); + [[nodiscard]] Value CubeFaceCoord(const Value& cube_coords); + [[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords); + void EmitVertex(); void EmitPrimitive(); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 6242a230e..19f45418f 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) +// Cube operations - optional, usable if profile.supports_native_cube_calc +OPCODE(CubeFaceCoord, F32x2, F32x3, ) +OPCODE(CubeFaceIndex, F32, F32x3, ) + // Warp operations OPCODE(LaneId, U32, ) OPCODE(WarpId, U32, ) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index fc8c5956e..f8878d442 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -24,6 +24,7 @@ struct Profile { bool support_explicit_workgroup_layout{}; bool support_legacy_vertex_attributes{}; bool supports_image_load_store_lod{}; + bool supports_native_cube_calc{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 9bc627830..6c3e066c6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -271,6 +271,7 @@ bool Instance::CreateDevice() { maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME); + amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 4e091824d..8928b4267 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -159,6 +159,11 @@ public: return image_load_store_lod; } + /// Returns true when VK_AMD_gcn_shader is supported. + bool IsAmdGcnShaderSupported() const { + return amd_gcn_shader; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -334,6 +339,7 @@ private: bool list_restart{}; bool legacy_vertex_attributes{}; bool image_load_store_lod{}; + bool amd_gcn_shader{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9cfc7c277..37137bd07 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_explicit_workgroup_layout = true, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), + .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||