shader_recompiler: Use shared memory buffer for non-compute shared memory.

This commit is contained in:
squidbus 2025-02-11 19:03:55 -08:00
parent 2188895b40
commit cf47738da8
8 changed files with 16 additions and 51 deletions

View File

@ -754,7 +754,6 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp

View File

@ -813,22 +813,22 @@ void EmitContext::DefineSharedMemory() {
if (!info.uses_shared) {
return;
}
const u32 max_shared_memory_size = profile.max_shared_memory_size;
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size == 0) {
shared_memory_size = DefaultSharedMemSize;
}
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
if (shared_memory_size <= max_shared_memory_size) {
if (info.stage == Stage::Compute && shared_memory_size <= max_shared_memory_size) {
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
Name(shared_memory_u32, "shared_mem");
interfaces.push_back(shared_memory_u32);
} else {
const Id type{TypeRuntimeArray(U32[1])};
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
@ -849,7 +849,6 @@ void EmitContext::DefineSharedMemory() {
shared_memory_u32 = ssbo_id;
info.has_emulated_shared_memory = true;
info.shared_memory_size = shared_memory_size;
interfaces.push_back(ssbo_id);
}
}

View File

@ -196,7 +196,6 @@ struct Info {
bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_emulated_shared_memory{};
bool has_readconst{};
u32 shared_memory_size{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
@ -233,7 +232,8 @@ struct Info {
}
void AddBindings(Backend::Bindings& bnd) const {
const auto total_buffers = buffers.size() + (has_readconst ? 1 : 0);
const auto total_buffers =
buffers.size() + (has_readconst ? 1 : 0) + (has_emulated_shared_memory ? 1 : 0);
bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();

View File

@ -20,7 +20,6 @@ void FlattenExtendedUserdataPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
void LowerBufferFormatToRaw(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);

View File

@ -1,38 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void LowerSharedMemToRegisters(IR::Program& program) {
boost::container::small_vector<IR::Inst*, 8> ds_writes;
Info& info{program.info};
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
const auto opcode = inst.GetOpcode();
if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) {
ds_writes.emplace_back(&inst);
continue;
}
if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) {
// Search for write instruction with same offset
const IR::Inst* prod = inst.Arg(0).InstRecursive();
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
return write_prod->Arg(1).U32() == prod->Arg(1).U32();
});
ASSERT(it != ds_writes.end());
// Replace data read with value written.
inst.ReplaceUsesWithAndRemove((*it)->Arg(1));
}
}
}
// We should have eliminated everything. Invalidate data write instructions.
for (const auto inst : ds_writes) {
inst->Invalidate();
}
}
} // namespace Shader::Optimization

View File

@ -82,9 +82,6 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
if (stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);

View File

@ -357,6 +357,14 @@ void GraphicsPipeline::BuildDescSetLayout() {
if (!stage) {
continue;
}
if (stage->has_emulated_shared_memory) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = gp_stage_flags,
});
}
if (stage->has_readconst) {
bindings.push_back({
.binding = binding++,

View File

@ -535,6 +535,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
// Bind the flattened user data buffer as a UBO so it's accessible to the shader