shader_recompiler: Use shared memory buffer for non-compute shared memory.

This commit is contained in:
squidbus 2025-02-11 19:03:55 -08:00
parent 2188895b40
commit cf47738da8
8 changed files with 16 additions and 51 deletions

View File

@ -754,7 +754,6 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/identity_removal_pass.cpp src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h src/shader_recompiler/ir/passes/ir_passes.h
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/ring_access_elimination.cpp src/shader_recompiler/ir/passes/ring_access_elimination.cpp
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp

View File

@ -813,22 +813,22 @@ void EmitContext::DefineSharedMemory() {
if (!info.uses_shared) { if (!info.uses_shared) {
return; return;
} }
const u32 max_shared_memory_size = profile.max_shared_memory_size; const u32 max_shared_memory_size = profile.max_shared_memory_size;
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size == 0) { if (shared_memory_size == 0) {
shared_memory_size = DefaultSharedMemSize; shared_memory_size = DefaultSharedMemSize;
} }
if (info.stage == Stage::Compute && shared_memory_size <= max_shared_memory_size) {
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)}; const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))}; const Id type{TypeArray(U32[1], ConstU32(num_elements))};
if (shared_memory_size <= max_shared_memory_size) {
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
Name(shared_memory_u32, "shared_mem"); Name(shared_memory_u32, "shared_mem");
interfaces.push_back(shared_memory_u32); interfaces.push_back(shared_memory_u32);
} else { } else {
const Id type{TypeRuntimeArray(U32[1])};
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type); shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
@ -849,7 +849,6 @@ void EmitContext::DefineSharedMemory() {
shared_memory_u32 = ssbo_id; shared_memory_u32 = ssbo_id;
info.has_emulated_shared_memory = true; info.has_emulated_shared_memory = true;
info.shared_memory_size = shared_memory_size;
interfaces.push_back(ssbo_id); interfaces.push_back(ssbo_id);
} }
} }

View File

@ -196,7 +196,6 @@ struct Info {
bool translation_failed{}; // indicates that shader has unsupported instructions bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_emulated_shared_memory{}; bool has_emulated_shared_memory{};
bool has_readconst{}; bool has_readconst{};
u32 shared_memory_size{};
u8 mrt_mask{0u}; u8 mrt_mask{0u};
bool has_fetch_shader{false}; bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u}; u32 fetch_shader_sgpr_base{0u};
@ -233,7 +232,8 @@ struct Info {
} }
void AddBindings(Backend::Bindings& bnd) const { void AddBindings(Backend::Bindings& bnd) const {
const auto total_buffers = buffers.size() + (has_readconst ? 1 : 0); const auto total_buffers =
buffers.size() + (has_readconst ? 1 : 0) + (has_emulated_shared_memory ? 1 : 0);
bnd.buffer += total_buffers; bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size(); bnd.unified += total_buffers + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs(); bnd.user_data += ud_mask.NumRegs();

View File

@ -20,7 +20,6 @@ void FlattenExtendedUserdataPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program); void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program); void CollectShaderInfoPass(IR::Program& program);
void LowerBufferFormatToRaw(IR::Program& program); void LowerBufferFormatToRaw(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info, void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage); Stage stage);
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info); void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);

View File

@ -1,38 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void LowerSharedMemToRegisters(IR::Program& program) {
boost::container::small_vector<IR::Inst*, 8> ds_writes;
Info& info{program.info};
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
const auto opcode = inst.GetOpcode();
if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) {
ds_writes.emplace_back(&inst);
continue;
}
if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) {
// Search for write instruction with same offset
const IR::Inst* prod = inst.Arg(0).InstRecursive();
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
return write_prod->Arg(1).U32() == prod->Arg(1).U32();
});
ASSERT(it != ds_writes.end());
// Replace data read with value written.
inst.ReplaceUsesWithAndRemove((*it)->Arg(1));
}
}
}
// We should have eliminated everything. Invalidate data write instructions.
for (const auto inst : ds_writes) {
inst->Invalidate();
}
}
} // namespace Shader::Optimization

View File

@ -82,9 +82,6 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
} }
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::RingAccessElimination(program, runtime_info, stage); Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
if (stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::FlattenExtendedUserdataPass(program); Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::ResourceTrackingPass(program);

View File

@ -357,6 +357,14 @@ void GraphicsPipeline::BuildDescSetLayout() {
if (!stage) { if (!stage) {
continue; continue;
} }
if (stage->has_emulated_shared_memory) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = gp_stage_flags,
});
}
if (stage->has_readconst) { if (stage->has_readconst) {
bindings.push_back({ bindings.push_back({
.binding = binding++, .binding = binding++,

View File

@ -535,6 +535,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
.descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &buffer_infos.back(), .pBufferInfo = &buffer_infos.back(),
}); });
++binding.buffer;
} }
// Bind the flattened user data buffer as a UBO so it's accessible to the shader // Bind the flattened user data buffer as a UBO so it's accessible to the shader