mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 00:42:48 +00:00
shader_recompiler: Use shared memory buffer for non-compute shared memory.
This commit is contained in:
parent
2188895b40
commit
cf47738da8
@ -754,7 +754,6 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ir_passes.h
|
src/shader_recompiler/ir/passes/ir_passes.h
|
||||||
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
|
src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
|
||||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
|
||||||
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
|
||||||
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
|
||||||
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||||
|
@ -813,22 +813,22 @@ void EmitContext::DefineSharedMemory() {
|
|||||||
if (!info.uses_shared) {
|
if (!info.uses_shared) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 max_shared_memory_size = profile.max_shared_memory_size;
|
const u32 max_shared_memory_size = profile.max_shared_memory_size;
|
||||||
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
if (shared_memory_size == 0) {
|
if (shared_memory_size == 0) {
|
||||||
shared_memory_size = DefaultSharedMemSize;
|
shared_memory_size = DefaultSharedMemSize;
|
||||||
}
|
}
|
||||||
|
if (info.stage == Stage::Compute && shared_memory_size <= max_shared_memory_size) {
|
||||||
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
|
||||||
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
|
||||||
|
|
||||||
if (shared_memory_size <= max_shared_memory_size) {
|
|
||||||
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
|
||||||
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
|
||||||
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
|
||||||
Name(shared_memory_u32, "shared_mem");
|
Name(shared_memory_u32, "shared_mem");
|
||||||
interfaces.push_back(shared_memory_u32);
|
interfaces.push_back(shared_memory_u32);
|
||||||
} else {
|
} else {
|
||||||
|
const Id type{TypeRuntimeArray(U32[1])};
|
||||||
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
|
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
|
||||||
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
||||||
|
|
||||||
@ -849,7 +849,6 @@ void EmitContext::DefineSharedMemory() {
|
|||||||
shared_memory_u32 = ssbo_id;
|
shared_memory_u32 = ssbo_id;
|
||||||
|
|
||||||
info.has_emulated_shared_memory = true;
|
info.has_emulated_shared_memory = true;
|
||||||
info.shared_memory_size = shared_memory_size;
|
|
||||||
interfaces.push_back(ssbo_id);
|
interfaces.push_back(ssbo_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -196,7 +196,6 @@ struct Info {
|
|||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||||
bool has_emulated_shared_memory{};
|
bool has_emulated_shared_memory{};
|
||||||
bool has_readconst{};
|
bool has_readconst{};
|
||||||
u32 shared_memory_size{};
|
|
||||||
u8 mrt_mask{0u};
|
u8 mrt_mask{0u};
|
||||||
bool has_fetch_shader{false};
|
bool has_fetch_shader{false};
|
||||||
u32 fetch_shader_sgpr_base{0u};
|
u32 fetch_shader_sgpr_base{0u};
|
||||||
@ -233,7 +232,8 @@ struct Info {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AddBindings(Backend::Bindings& bnd) const {
|
void AddBindings(Backend::Bindings& bnd) const {
|
||||||
const auto total_buffers = buffers.size() + (has_readconst ? 1 : 0);
|
const auto total_buffers =
|
||||||
|
buffers.size() + (has_readconst ? 1 : 0) + (has_emulated_shared_memory ? 1 : 0);
|
||||||
bnd.buffer += total_buffers;
|
bnd.buffer += total_buffers;
|
||||||
bnd.unified += total_buffers + images.size() + samplers.size();
|
bnd.unified += total_buffers + images.size() + samplers.size();
|
||||||
bnd.user_data += ud_mask.NumRegs();
|
bnd.user_data += ud_mask.NumRegs();
|
||||||
|
@ -20,7 +20,6 @@ void FlattenExtendedUserdataPass(IR::Program& program);
|
|||||||
void ResourceTrackingPass(IR::Program& program);
|
void ResourceTrackingPass(IR::Program& program);
|
||||||
void CollectShaderInfoPass(IR::Program& program);
|
void CollectShaderInfoPass(IR::Program& program);
|
||||||
void LowerBufferFormatToRaw(IR::Program& program);
|
void LowerBufferFormatToRaw(IR::Program& program);
|
||||||
void LowerSharedMemToRegisters(IR::Program& program);
|
|
||||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||||
Stage stage);
|
Stage stage);
|
||||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||||
|
@ -1,38 +0,0 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
|
||||||
#include "shader_recompiler/ir/program.h"
|
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
|
||||||
|
|
||||||
void LowerSharedMemToRegisters(IR::Program& program) {
|
|
||||||
boost::container::small_vector<IR::Inst*, 8> ds_writes;
|
|
||||||
Info& info{program.info};
|
|
||||||
for (IR::Block* const block : program.blocks) {
|
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
|
||||||
const auto opcode = inst.GetOpcode();
|
|
||||||
if (opcode == IR::Opcode::WriteSharedU32 || opcode == IR::Opcode::WriteSharedU64) {
|
|
||||||
ds_writes.emplace_back(&inst);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (opcode == IR::Opcode::LoadSharedU32 || opcode == IR::Opcode::LoadSharedU64) {
|
|
||||||
// Search for write instruction with same offset
|
|
||||||
const IR::Inst* prod = inst.Arg(0).InstRecursive();
|
|
||||||
const auto it = std::ranges::find_if(ds_writes, [&](const IR::Inst* write) {
|
|
||||||
const IR::Inst* write_prod = write->Arg(0).InstRecursive();
|
|
||||||
return write_prod->Arg(1).U32() == prod->Arg(1).U32();
|
|
||||||
});
|
|
||||||
ASSERT(it != ds_writes.end());
|
|
||||||
// Replace data read with value written.
|
|
||||||
inst.ReplaceUsesWithAndRemove((*it)->Arg(1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// We should have eliminated everything. Invalidate data write instructions.
|
|
||||||
for (const auto inst : ds_writes) {
|
|
||||||
inst->Invalidate();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader::Optimization
|
|
@ -82,9 +82,6 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
|||||||
}
|
}
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
||||||
if (stage != Stage::Compute) {
|
|
||||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
|
||||||
}
|
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
Shader::Optimization::FlattenExtendedUserdataPass(program);
|
||||||
Shader::Optimization::ResourceTrackingPass(program);
|
Shader::Optimization::ResourceTrackingPass(program);
|
||||||
|
@ -357,6 +357,14 @@ void GraphicsPipeline::BuildDescSetLayout() {
|
|||||||
if (!stage) {
|
if (!stage) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (stage->has_emulated_shared_memory) {
|
||||||
|
bindings.push_back({
|
||||||
|
.binding = binding++,
|
||||||
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
|
.descriptorCount = 1,
|
||||||
|
.stageFlags = gp_stage_flags,
|
||||||
|
});
|
||||||
|
}
|
||||||
if (stage->has_readconst) {
|
if (stage->has_readconst) {
|
||||||
bindings.push_back({
|
bindings.push_back({
|
||||||
.binding = binding++,
|
.binding = binding++,
|
||||||
|
@ -535,6 +535,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
|||||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||||
.pBufferInfo = &buffer_infos.back(),
|
.pBufferInfo = &buffer_infos.back(),
|
||||||
});
|
});
|
||||||
|
++binding.buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
// Bind the flattened user data buffer as a UBO so it's accessible to the shader
|
||||||
|
Loading…
Reference in New Issue
Block a user