shader_recompiler: Use VK_AMD_shader_trinary_minmax when available. (#2739)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

* shader_recompiler: Use VK_AMD_shader_trinary_minmax when available.

* shader_recompiler: Simplify signed/unsigned trinary instruction variants.
This commit is contained in:
squidbus
2025-04-02 13:36:54 -07:00
committed by GitHub
parent eb300d0aa7
commit afd0251dd2
13 changed files with 168 additions and 34 deletions

View File

@@ -75,6 +75,28 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b) {
return ctx.OpFMin(ctx.F64[1], a, b);
}
Id EmitFPMinTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpFMin3AMD(ctx.F32[1], a, b, c);
}
return ctx.OpFMin(ctx.F32[1], a, ctx.OpFMin(ctx.F32[1], b, c));
}
Id EmitFPMaxTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpFMax3AMD(ctx.F32[1], a, b, c);
}
return ctx.OpFMax(ctx.F32[1], a, ctx.OpFMax(ctx.F32[1], b, c));
}
Id EmitFPMedTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpFMid3AMD(ctx.F32[1], a, b, c);
}
const Id mmx{ctx.OpFMin(ctx.F32[1], ctx.OpFMax(ctx.F32[1], a, b), c)};
return ctx.OpFMax(ctx.F32[1], ctx.OpFMin(ctx.F32[1], a, b), mmx);
}
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b));
}

View File

@@ -247,6 +247,9 @@ Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMinTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitFPMaxTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitFPMedTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
@@ -372,6 +375,12 @@ Id EmitSMin32(EmitContext& ctx, Id a, Id b);
Id EmitUMin32(EmitContext& ctx, Id a, Id b);
Id EmitSMax32(EmitContext& ctx, Id a, Id b);
Id EmitUMax32(EmitContext& ctx, Id a, Id b);
Id EmitSMinTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitUMinTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitSMaxTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitUMaxTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitSMedTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitUMedTri32(EmitContext& ctx, Id a, Id b, Id c);
Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max);
Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs);

View File

@@ -256,6 +256,50 @@ Id EmitUMax32(EmitContext& ctx, Id a, Id b) {
return ctx.OpUMax(ctx.U32[1], a, b);
}
Id EmitSMinTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpSMin3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpSMin(ctx.U32[1], a, ctx.OpSMin(ctx.U32[1], b, c));
}
Id EmitUMinTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpUMin3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpUMin(ctx.U32[1], a, ctx.OpUMin(ctx.U32[1], b, c));
}
Id EmitSMaxTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpSMax3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpSMax(ctx.U32[1], a, ctx.OpSMax(ctx.U32[1], b, c));
}
Id EmitUMaxTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpUMax3AMD(ctx.U32[1], a, b, c);
}
return ctx.OpUMax(ctx.U32[1], a, ctx.OpUMax(ctx.U32[1], b, c));
}
Id EmitSMedTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpSMid3AMD(ctx.U32[1], a, b, c);
}
const Id mmx{ctx.OpSMin(ctx.U32[1], ctx.OpSMax(ctx.U32[1], a, b), c)};
return ctx.OpSMax(ctx.U32[1], ctx.OpSMin(ctx.U32[1], a, b), mmx);
}
Id EmitUMedTri32(EmitContext& ctx, Id a, Id b, Id c) {
if (ctx.profile.supports_trinary_minmax) {
return ctx.OpUMid3AMD(ctx.U32[1], a, b, c);
}
const Id mmx{ctx.OpUMin(ctx.U32[1], ctx.OpUMax(ctx.U32[1], a, b), c)};
return ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], a, b), mmx);
}
Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) {
Id result{};
if (ctx.profile.has_broken_spirv_clamp) {