shader_recompiler: Remove existing shared memory emulation

* The current impl relies on backend side implementaton and hooking into every shared memory access. It also doesnt handle atomics. Will be replaced by an IR pass that solves these issues
This commit is contained in:
IndecisiveTurtle 2025-02-14 11:52:28 +02:00
parent b0dd81a2b9
commit 174107b410
7 changed files with 22 additions and 127 deletions

View File

@ -9,65 +9,35 @@ namespace Shader::Backend::SPIRV {
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
if (ctx.info.has_emulated_shared_memory) {
const Id pointer =
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U32[1], pointer);
} else {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
return ctx.OpLoad(ctx.U32[1], pointer);
}
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
if (ctx.info.has_emulated_shared_memory) {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
} else {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
ctx.OpLoad(ctx.U32[1], rhs_pointer));
}
}
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
if (ctx.info.has_emulated_shared_memory) {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, word_offset);
ctx.OpStore(pointer, value);
} else {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
ctx.OpStore(pointer, value);
}
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)};
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
if (ctx.info.has_emulated_shared_memory) {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32,
ctx.u32_zero_value, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
} else {
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
}
}
} // namespace Shader::Backend::SPIRV

View File

@ -5,7 +5,6 @@
#include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
@ -805,51 +804,18 @@ void EmitContext::DefineImagesAndSamplers() {
}
void EmitContext::DefineSharedMemory() {
static constexpr size_t DefaultSharedMemSize = 2_KB;
if (!info.uses_shared) {
return;
}
ASSERT(info.stage == Stage::Compute);
const u32 max_shared_memory_size = profile.max_shared_memory_size;
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size == 0) {
shared_memory_size = DefaultSharedMemSize;
}
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
const Id type{TypeArray(U32[1], ConstU32(num_elements))};
if (shared_memory_size <= max_shared_memory_size) {
shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
Name(shared_memory_u32, "shared_mem");
interfaces.push_back(shared_memory_u32);
} else {
shared_memory_u32_type = TypePointer(spv::StorageClass::StorageBuffer, type);
shared_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
Decorate(type, spv::Decoration::ArrayStride, 4);
const Id struct_type{TypeStruct(type)};
Name(struct_type, "shared_memory_buf");
Decorate(struct_type, spv::Decoration::Block);
MemberName(struct_type, 0, "data");
MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
const Id struct_pointer_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)};
const Id ssbo_id{AddGlobalVariable(struct_pointer_type, spv::StorageClass::StorageBuffer)};
Decorate(ssbo_id, spv::Decoration::Binding, binding.unified++);
Decorate(ssbo_id, spv::Decoration::DescriptorSet, 0U);
Name(ssbo_id, "shared_mem_ssbo");
shared_memory_u32 = ssbo_id;
info.has_emulated_shared_memory = true;
info.shared_memory_size = shared_memory_size;
interfaces.push_back(ssbo_id);
}
}
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {

View File

@ -507,7 +507,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
info.buffers.push_back({
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32,
.is_instance_data = true,
.instance_attrib = attrib.semantic,
});
}

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <span>
#include <vector>
#include <boost/container/small_vector.hpp>
@ -19,7 +18,6 @@
#include "shader_recompiler/params.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/resource.h"
namespace Shader {
@ -51,7 +49,6 @@ struct BufferResource {
IR::Type used_types;
AmdGpu::Buffer inline_cbuf;
BufferType buffer_type;
bool is_instance_data{};
u8 instance_attrib{};
bool is_written{};
bool is_formatted{};
@ -203,10 +200,8 @@ struct Info {
bool uses_unpack_10_11_11{};
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_emulated_shared_memory{};
bool translation_failed{};
bool has_readconst{};
u32 shared_memory_size{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
@ -243,10 +238,8 @@ struct Info {
}
void AddBindings(Backend::Bindings& bnd) const {
const auto total_buffers =
buffers.size() + (has_emulated_shared_memory ? 1 : 0);
bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size();
bnd.buffer += buffers.size();
bnd.unified += buffers.size() + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();
}

View File

@ -98,9 +98,6 @@ struct StageSpecialization {
});
}
u32 binding{};
if (info->has_emulated_shared_memory) {
binding++;
}
ForEachSharp(binding, buffers, info->buffers,
[profile_](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
@ -192,12 +189,6 @@ struct StageSpecialization {
}
}
u32 binding{};
if (info->has_emulated_shared_memory != other.info->has_emulated_shared_memory) {
return false;
}
if (info->has_emulated_shared_memory) {
binding++;
}
for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false;

View File

@ -27,15 +27,6 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
if (info->has_emulated_shared_memory) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& buffer : info->buffers) {
const auto sharp = buffer.GetSharp(*info);
bindings.push_back({

View File

@ -519,27 +519,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
}
}
// Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer
// (e.g. when the compute shared memory is bigger than the GPU's shared memory)
if (stage.has_emulated_shared_memory) {
const auto* lds_buf = buffer_cache.GetLdsBuffer();
buffer_infos.emplace_back(lds_buf->Handle(), 0, lds_buf->SizeBytes());
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
// Second pass to re-bind buffers that were updated after binding
for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp, pipeline_cache.GetProfile());
// Buffer is not from the cache, either a special buffer or unbound.
if (!buffer_id) {
if (desc.buffer_type == Shader::BufferType::GdsBuffer) {
const auto* gds_buf = buffer_cache.GetGdsBuffer();