mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 17:02:40 +00:00
shader_recompiler: Use native AMD cube instructions when possible.
This commit is contained in:
parent
40d35211a3
commit
3a7b2bf948
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
||||
Subproject commit 1e74f4ef8d2a0e3221a4de51977663f342b53c35
|
||||
Subproject commit 26ad5a9d0fe13260b0d7d6c64419d01a196b2e32
|
@ -256,4 +256,20 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
|
||||
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
|
||||
}
|
||||
|
||||
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
|
||||
if (ctx.profile.supports_native_cube_calc) {
|
||||
return ctx.OpCubeFaceCoordAMD(ctx.F32[2], cube_coords);
|
||||
} else {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords) {
|
||||
if (ctx.profile.supports_native_cube_calc) {
|
||||
return ctx.OpCubeFaceIndexAMD(ctx.F32[1], cube_coords);
|
||||
} else {
|
||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -439,6 +439,8 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
Id EmitCubeFaceCoord(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
|
||||
Id EmitCubeFaceIndex(EmitContext& ctx, IR::Inst* inst, Id cube_coords);
|
||||
Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitWarpId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#include "shader_recompiler/frontend/opcodes.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
@ -1061,14 +1062,19 @@ void Translator::V_CUBEID_F32(const GcnInst& inst) {
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
|
||||
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
|
||||
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
|
||||
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
|
||||
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
|
||||
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
|
||||
IR::F32 result;
|
||||
if (profile.supports_native_cube_calc) {
|
||||
result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z));
|
||||
} else {
|
||||
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
|
||||
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
|
||||
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
|
||||
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
|
||||
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
|
||||
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
|
||||
|
||||
const auto result{SelectCubeResult(x, y, z, x_face, y_face, z_face)};
|
||||
result = SelectCubeResult(x, y, z, x_face, y_face, z_face);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
@ -1077,12 +1083,18 @@ void Translator::V_CUBESC_F32(const GcnInst& inst) {
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
|
||||
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
|
||||
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
|
||||
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
|
||||
IR::F32 result;
|
||||
if (profile.supports_native_cube_calc) {
|
||||
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
|
||||
result = IR::F32{ir.CompositeExtract(coords, 0)};
|
||||
} else {
|
||||
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
|
||||
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
|
||||
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
|
||||
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
|
||||
|
||||
const auto result{SelectCubeResult(x, y, z, x_sc, x, z_sc)};
|
||||
result = SelectCubeResult(x, y, z, x_sc, x, z_sc);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
@ -1091,11 +1103,17 @@ void Translator::V_CUBETC_F32(const GcnInst& inst) {
|
||||
const auto y = GetSrc<IR::F32>(inst.src[1]);
|
||||
const auto z = GetSrc<IR::F32>(inst.src[2]);
|
||||
|
||||
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
|
||||
const IR::F32 x_z_sc{ir.FPNeg(y)};
|
||||
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
|
||||
IR::F32 result;
|
||||
if (profile.supports_native_cube_calc) {
|
||||
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
|
||||
result = IR::F32{ir.CompositeExtract(coords, 1)};
|
||||
} else {
|
||||
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
|
||||
const IR::F32 x_z_sc{ir.FPNeg(y)};
|
||||
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
|
||||
|
||||
const auto result{SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc)};
|
||||
result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc);
|
||||
}
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
|
@ -1758,6 +1758,14 @@ void IREmitter::ImageWrite(const Value& handle, const Value& coords, const U32&
|
||||
Inst(Opcode::ImageWrite, Flags{info}, handle, coords, lod, multisampling, color);
|
||||
}
|
||||
|
||||
[[nodiscard]] Value IREmitter::CubeFaceCoord(const Value& cube_coords) {
|
||||
return Inst(Opcode::CubeFaceCoord, cube_coords);
|
||||
}
|
||||
|
||||
[[nodiscard]] F32 IREmitter::CubeFaceIndex(const Value& cube_coords) {
|
||||
return Inst<F32>(Opcode::CubeFaceIndex, cube_coords);
|
||||
}
|
||||
|
||||
// Debug print maps to SPIRV's NonSemantic DebugPrintf instruction
|
||||
// Renderdoc will hook in its own implementation of the SPIRV instruction
|
||||
// Renderdoc accepts format specifiers, e.g. %u, listed here:
|
||||
|
@ -342,6 +342,9 @@ public:
|
||||
void ImageWrite(const Value& handle, const Value& coords, const U32& lod,
|
||||
const U32& multisampling, const Value& color, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value CubeFaceCoord(const Value& cube_coords);
|
||||
[[nodiscard]] F32 CubeFaceIndex(const Value& cube_coords);
|
||||
|
||||
void EmitVertex();
|
||||
void EmitPrimitive();
|
||||
|
||||
|
@ -374,6 +374,10 @@ OPCODE(ImageAtomicOr32, U32, Opaq
|
||||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||
|
||||
// Cube operations - optional, usable if profile.supports_native_cube_calc
|
||||
OPCODE(CubeFaceCoord, F32x2, F32x3, )
|
||||
OPCODE(CubeFaceIndex, F32, F32x3, )
|
||||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(WarpId, U32, )
|
||||
|
@ -24,6 +24,7 @@ struct Profile {
|
||||
bool support_explicit_workgroup_layout{};
|
||||
bool support_legacy_vertex_attributes{};
|
||||
bool supports_image_load_store_lod{};
|
||||
bool supports_native_cube_calc{};
|
||||
bool has_broken_spirv_clamp{};
|
||||
bool lower_left_origin_mode{};
|
||||
bool needs_manual_interpolation{};
|
||||
|
@ -271,6 +271,7 @@ bool Instance::CreateDevice() {
|
||||
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
|
||||
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
|
||||
image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
|
||||
amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
|
||||
|
||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||
// with extensions.
|
||||
|
@ -159,6 +159,11 @@ public:
|
||||
return image_load_store_lod;
|
||||
}
|
||||
|
||||
/// Returns true when VK_AMD_gcn_shader is supported.
|
||||
bool IsAmdGcnShaderSupported() const {
|
||||
return amd_gcn_shader;
|
||||
}
|
||||
|
||||
/// Returns true when geometry shaders are supported by the device
|
||||
bool IsGeometryStageSupported() const {
|
||||
return features.geometryShader;
|
||||
@ -334,6 +339,7 @@ private:
|
||||
bool list_restart{};
|
||||
bool legacy_vertex_attributes{};
|
||||
bool image_load_store_lod{};
|
||||
bool amd_gcn_shader{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
|
@ -204,6 +204,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
.support_explicit_workgroup_layout = true,
|
||||
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
|
||||
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
|
||||
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
|
||||
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
|
||||
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
|
||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||
|
Loading…
Reference in New Issue
Block a user