mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 21:58:45 +00:00
spirv: Simplify shared memory handling (#427)
* spirv: Simplify shared memory handling * spirv: Ignore clip plane * spirv: Fix image offsets * ir_pass: Implement shared memory lowering pass * NVIDIA doesn't like using shared mem in fragment shader and softlocks driver * spirv: Add log for ignoring pos1
This commit is contained in:
@@ -259,10 +259,6 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
|
||||
case 16:
|
||||
return Inst<U32>(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
case 64:
|
||||
@@ -276,12 +272,6 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 8:
|
||||
Inst(Opcode::WriteSharedU8, offset, value);
|
||||
break;
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
break;
|
||||
@@ -1398,13 +1388,13 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body
|
||||
}
|
||||
|
||||
Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
|
||||
const Value& offset2, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset, offset2);
|
||||
TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGather, Flags{info}, handle, coords, offset);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
|
||||
const Value& offset2, const F32& dref, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, offset2, dref);
|
||||
const F32& dref, TextureInstInfo info) {
|
||||
return Inst(Opcode::ImageGatherDref, Flags{info}, handle, coords, offset, dref);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
|
||||
@@ -256,18 +256,17 @@ public:
|
||||
const F32& dref, const U32& offset,
|
||||
TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
const IR::U1& skip_mips);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod,
|
||||
const IR::U1& skip_mips, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||
const U1& skip_mips);
|
||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||
const U1& skip_mips, TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
|
||||
const Value& offset2, TextureInstInfo info);
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
|
||||
const Value& offset, const Value& offset2, const F32& dref,
|
||||
TextureInstInfo info);
|
||||
const Value& offset, const F32& dref, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
||||
|
||||
@@ -59,8 +59,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::WriteSharedU128:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU16:
|
||||
case Opcode::WriteSharedU8:
|
||||
case Opcode::ImageWrite:
|
||||
case Opcode::ImageAtomicIAdd32:
|
||||
case Opcode::ImageAtomicSMin32:
|
||||
|
||||
@@ -26,15 +26,9 @@ OPCODE(WorkgroupMemoryBarrier, Void,
|
||||
OPCODE(DeviceMemoryBarrier, Void, )
|
||||
|
||||
// Shared memory operations
|
||||
OPCODE(LoadSharedU8, U32, U32, )
|
||||
OPCODE(LoadSharedS8, U32, U32, )
|
||||
OPCODE(LoadSharedU16, U32, U32, )
|
||||
OPCODE(LoadSharedS16, U32, U32, )
|
||||
OPCODE(LoadSharedU32, U32, U32, )
|
||||
OPCODE(LoadSharedU64, U32x2, U32, )
|
||||
OPCODE(LoadSharedU128, U32x4, U32, )
|
||||
OPCODE(WriteSharedU8, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU16, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU32, Void, U32, U32, )
|
||||
OPCODE(WriteSharedU64, Void, U32, U32x2, )
|
||||
OPCODE(WriteSharedU128, Void, U32, U32x4, )
|
||||
@@ -298,12 +292,12 @@ OPCODE(ConvertU16U32, U16, U32,
|
||||
OPCODE(ConvertU32U16, U32, U16, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, U32, )
|
||||
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, U32, )
|
||||
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, U32, )
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, U32, )
|
||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
|
||||
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, )
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
||||
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
|
||||
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
||||
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
||||
|
||||
@@ -14,5 +14,6 @@ void DeadCodeEliminationPass(IR::Program& program);
|
||||
void ConstantPropagationPass(IR::BlockList& program);
|
||||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
void LowerSharedMemToRegisters(IR::Program& program) {
|
||||
boost::container::small_vector<IR::Inst*, 8> ds_writes;
|
||||
Info& info{program.info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) {
|
||||
ds_writes.emplace_back(&inst);
|
||||
continue;
|
||||
}
|
||||
if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) {
|
||||
// Search for write instruction with same offset
|
||||
const IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
|
||||
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
|
||||
return write_prod->Arg(1).U32() == prod->Arg(1).U32() &&
|
||||
write_prod->Arg(0) == prod->Arg(0);
|
||||
});
|
||||
ASSERT(it != ds_writes.end());
|
||||
// Replace data read with value written.
|
||||
inst.ReplaceUsesWith((*it)->Arg(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
// We should have eliminated everything. Invalidate data write instructions.
|
||||
for (const auto inst : ds_writes) {
|
||||
inst->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
@@ -171,6 +171,22 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
u32 ImageOffsetArgumentPosition(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageGather:
|
||||
case IR::Opcode::ImageGatherDref:
|
||||
return 2;
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
return 3;
|
||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
||||
return 4;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(Info& info_)
|
||||
@@ -574,33 +590,29 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||
|
||||
if (inst_info.has_offset) {
|
||||
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
|
||||
const u32 arg_pos = [&]() -> u32 {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageGather:
|
||||
case IR::Opcode::ImageGatherDref:
|
||||
return 2;
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
return 3;
|
||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
||||
return 4;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return inst_info.is_depth ? 4 : 3;
|
||||
}();
|
||||
const u32 arg_pos = ImageOffsetArgumentPosition(inst);
|
||||
const IR::Value arg = inst.Arg(arg_pos);
|
||||
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
||||
const auto f = [&](IR::Value value, u32 offset) -> auto {
|
||||
|
||||
const auto read = [&](u32 offset) -> auto {
|
||||
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
|
||||
};
|
||||
|
||||
const auto x = f(arg, 0);
|
||||
const auto y = f(arg, 8);
|
||||
const auto z = f(arg, 16);
|
||||
const IR::Value value = ir.CompositeConstruct(x, y, z);
|
||||
inst.SetArg(arg_pos, value);
|
||||
switch (image.GetType()) {
|
||||
case AmdGpu::ImageType::Color1D:
|
||||
case AmdGpu::ImageType::Color1DArray:
|
||||
inst.SetArg(arg_pos, read(0));
|
||||
break;
|
||||
case AmdGpu::ImageType::Color2D:
|
||||
case AmdGpu::ImageType::Color2DArray:
|
||||
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8)));
|
||||
break;
|
||||
case AmdGpu::ImageType::Color3D:
|
||||
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16)));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
if (inst_info.has_lod_clamp) {
|
||||
|
||||
@@ -16,18 +16,6 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedS8:
|
||||
case IR::Opcode::LoadSharedU8:
|
||||
case IR::Opcode::WriteSharedU8:
|
||||
info.uses_shared_u8 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedS16:
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
info.uses_shared_u16 = true;
|
||||
info.uses_shared = true;
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
|
||||
Reference in New Issue
Block a user