shader_recompiler: Use native AMD cube instructions when possible.

This commit is contained in:
squidbus 2025-01-07 14:36:16 -08:00
parent 40d35211a3
commit 3a7b2bf948
11 changed files with 77 additions and 17 deletions

2
externals/sirit vendored

@ -1 +1 @@
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35
Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32

View File

@ -256,4 +256,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
}
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
if (ctx.profile.supports_native_cube_calc) {
return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords);
} else {
UNREACHABLE_MSG("SPIR-V Instruction");
}
}
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
if (ctx.profile.supports_native_cube_calc) {
return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords);
} else {
UNREACHABLE_MSG("SPIR-V Instruction");
}
}
} // namespace Shader::Backend::SPIRV

View File

@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);

View File

@ -3,6 +3,7 @@
#include "shader_recompiler/frontend/opcodes.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn {
@ -1061,14 +1062,19 @@ void Translator::V_CUBEID_F32(const GcnInst& inst) {
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
IR::F32 result;
if (profile.supports_native_cube_calc) {
result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z));
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
const auto result{SelectCubeResult(x, y, z, x_face, y_face, z_face)};
result = SelectCubeResult(x, y, z, x_face, y_face, z_face);
}
SetDst(inst.dst[0], result);
}
@ -1077,12 +1083,18 @@ void Translator::V_CUBESC_F32(const GcnInst& inst) {
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 0)};
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
const auto result{SelectCubeResult(x, y, z, x_sc, x, z_sc)};
result = SelectCubeResult(x, y, z, x_sc, x, z_sc);
}
SetDst(inst.dst[0], result);
}
@ -1091,11 +1103,17 @@ void Translator::V_CUBETC_F32(const GcnInst& inst) {
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const IR::F32 x_z_sc{ir.FPNeg(y)};
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 1)};
} else {
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const IR::F32 x_z_sc{ir.FPNeg(y)};
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
const auto result{SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc)};
result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc);
}
SetDst(inst.dst[0], result);
}

View File

@ -1758,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32&
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color);
}
[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) {
return Inst(Opcode::CubeFaceCoord, cube_coords);
}
[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) {
return Inst<F32>(Opcode::CubeFaceIndex, cube_coords);
}
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
// Renderdoc will hook in its own implementation of the SPIRV instruction
// Renderdoc accepts format specifiers, e.g. %u, listed here:

View File

@ -342,6 +342,9 @@ public:
void ImageWrite(const Value& handle, const Value& coords, const U32& lod,
const U32& multisampling, const Value& color, TextureInstInfo info);
[[nodiscard]] Value CubeFaceCoord(const Value& cube_coords);
[[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords);
void EmitVertex();
void EmitPrimitive();

View File

@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
// Cube operations - optional, usable if profile.supports_native_cube_calc
OPCODE(CubeFaceCoord, F32x2, F32x3, )
OPCODE(CubeFaceIndex, F32, F32x3, )
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(WarpId, U32, )

View File

@ -24,6 +24,7 @@ struct Profile {
bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{};
bool supports_image_load_store_lod{};
bool supports_native_cube_calc{};
bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{};
bool needs_manual_interpolation{};

View File

@ -271,6 +271,7 @@ bool Instance::CreateDevice() {
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.

View File

@ -159,6 +159,11 @@ public:
return image_load_store_lod;
}
/// Returns true when VK_AMD_gcn_shader is supported.
bool IsAmdGcnShaderSupported() const {
return amd_gcn_shader;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@ -334,6 +339,7 @@ private:
bool list_restart{};
bool legacy_vertex_attributes{};
bool image_load_store_lod{};
bool amd_gcn_shader{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};

View File

@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||