From 57df6f189f1e9efa0418f7e6ff9f9b39ebb94aa7 Mon Sep 17 00:00:00 2001 From: Frodo Baggins Date: Sat, 12 Oct 2024 22:56:41 -0700 Subject: [PATCH] address some review comments --- .../backend/spirv/spirv_emit_context.cpp | 2 +- .../frontend/translate/translate.cpp | 2 +- src/shader_recompiler/info.h | 4 +- .../ir/breadth_first_search.h | 8 ++-- .../passes/flatten_extended_userdata_pass.cpp | 28 +++++-------- src/shader_recompiler/ir/passes/srt.h | 40 ++----------------- src/video_core/buffer_cache/buffer_cache.cpp | 8 ++-- src/video_core/buffer_cache/buffer_cache.h | 2 +- 8 files changed, 26 insertions(+), 68 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index f357a0746..dc404b121 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -460,7 +460,7 @@ void EmitContext::DefineBuffers() { const auto storage_class = spv::StorageClass::Uniform; const Id pointer_type = TypePointer(storage_class, data_type); const Id record_array_type{ - TypeArray(U32[1], ConstU32(static_cast(info.flattened_ud_buf.num_dwords())))}; + TypeArray(U32[1], ConstU32(static_cast(info.flattened_ud_buf.size())))}; const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 7b163b0a8..ccce31a24 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -418,7 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) { if (step_rate == Info::VsInput::OverStepRate0 || step_rate == Info::VsInput::OverStepRate1) { info.buffers.push_back({ - .sharp_idx = info.srt_info.reserve_sharp(attrib.sgpr_base, attrib.dword_offset, 4), + .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4), .used_types = IR::Type::F32, .is_instance_data = true, }); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 4a560c747..610090da0 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -180,7 +180,7 @@ struct Info { SamplerResourceList samplers; PersistentSrtInfo srt_info; - FlattenedUserDataBuffer flattened_ud_buf; + std::vector flattened_ud_buf; std::span user_data; Stage stage; @@ -213,7 +213,7 @@ struct Info { template inline T ReadUdSharp(u32 sharp_idx) const noexcept { - return flattened_ud_buf.ReadUdSharp(sharp_idx); + return *reinterpret_cast(&flattened_ud_buf[sharp_idx]); } template diff --git a/src/shader_recompiler/ir/breadth_first_search.h b/src/shader_recompiler/ir/breadth_first_search.h index b3ed87204..b042ae3d6 100644 --- a/src/shader_recompiler/ir/breadth_first_search.h +++ b/src/shader_recompiler/ir/breadth_first_search.h @@ -14,8 +14,8 @@ namespace Shader::IR { // Use typename Instruction so the function can be used to return either const or mutable // Insts depending on the context. template -auto BreadthFirstSearch(Instruction* inst, - Pred&& pred) -> std::invoke_result_t { +auto BreadthFirstSearch(Instruction* inst, Pred&& pred) + -> std::invoke_result_t { // Most often case the instruction is the desired already. if (std::optional result = pred(inst)) { return result; @@ -53,8 +53,8 @@ auto BreadthFirstSearch(Instruction* inst, } template -auto BreadthFirstSearch(const Value& value, - Pred&& pred) -> std::invoke_result_t { +auto BreadthFirstSearch(const Value& value, Pred&& pred) + -> std::invoke_result_t { if (value.IsImmediate()) { // Nothing to do with immediates return std::nullopt; diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 354182064..edea69565 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -8,6 +8,7 @@ #include #include "common/config.h" #include "common/io_file.h" +#include "common/logging/log.h" #include "common/path_util.h" #include "common/singleton.h" #include "shader_recompiler/info.h" @@ -98,15 +99,6 @@ struct PassInfo { namespace Shader::Optimization { namespace { -static IR::Value GetReadConstOff(const IR::Inst* inst) { - ASSERT(inst->GetOpcode() == IR::Opcode::ReadConst); - return inst->Arg(1); -} - -static IR::ScalarReg GetUserDataSgprBase(const IR::Inst* inst) { - ASSERT(inst->GetOpcode() == IR::Opcode::GetUserData); - return inst->Arg(0).ScalarReg(); -} static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { c.push(rdi); @@ -158,14 +150,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { pass_info.dst_off_dw = NumUserDataRegs; // Special case for V# step rate buffers in fetch shader - for (auto i = 0; i < info.srt_info.srt_reservations.size(); i++) { - PersistentSrtInfo::SrtSharpReservation res = info.srt_info.srt_reservations[i]; + for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) { // get pointer to V# - c.mov(r10d, ptr[rdi + (res.sgpr_base << 2)]); + c.mov(r10d, ptr[rdi + (sgpr_base << 2)]); - u32 src_off = res.dword_offset << 2; + u32 src_off = dword_offset << 2; - for (auto j = 0; j < res.num_dwords; j++) { + for (auto j = 0; j < num_dwords; j++) { c.mov(r11d, ptr[r10d + src_off]); c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d); @@ -210,7 +201,8 @@ void FlattenExtendedUserdataPass(IR::Program& program) { IR::Block* block = *r_it; for (IR::Inst& inst : *block) { if (inst.GetOpcode() == IR::Opcode::ReadConst) { - if (!GetReadConstOff(&inst).IsImmediate()) { + if (!inst.Arg(1).IsImmediate()) { + LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset"); continue; } @@ -233,8 +225,6 @@ void FlattenExtendedUserdataPass(IR::Program& program) { auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred); ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory"); - // TODO this probably requires some template magic to fix. BFS needs non-const - // variant. Needs to be non-const to change flags IR::Inst* ptr_lo = base0.value(); ptr_lo = pass_info.DeduplicateInstruction(ptr_lo); @@ -242,10 +232,10 @@ void FlattenExtendedUserdataPass(IR::Program& program) { pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{}); PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second; - user_list[GetReadConstOff(&inst).U32()] = &inst; + user_list[inst.Arg(1).U32()] = &inst; if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) { - IR::ScalarReg ud_reg = GetUserDataSgprBase(ptr_lo); + IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg(); pass_info.srt_roots[ud_reg] = ptr_lo; } } diff --git a/src/shader_recompiler/ir/passes/srt.h b/src/shader_recompiler/ir/passes/srt.h index 0e2d5a2d7..95e8902a5 100644 --- a/src/shader_recompiler/ir/passes/srt.h +++ b/src/shader_recompiler/ir/passes/srt.h @@ -18,39 +18,7 @@ namespace Shader { -class FlattenedUserDataBuffer { -public: - template - T ReadUdSharp(u32 sharp_idx) const noexcept { - return *reinterpret_cast(&buf[sharp_idx]); - } - - size_t num_dwords() const { - return buf.size(); - } - - size_t size_bytes() const { - return buf.size() * sizeof(u32); - } - - u32* data() { - return buf.data(); - } - - const u32* data() const { - return buf.data(); - } - - void resize(size_t new_size_dw) { - buf.resize(new_size_dw); - } - -private: - std::vector buf; -}; - -typedef void(__attribute__((sysv_abi)) * PFN_SrtWalker)(const u32* /*user_data*/, - u32* /*flat_dst*/); +using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/); // Utility for copying a simple relocatable function from a Xbyak code generator to manage memory // separately @@ -99,8 +67,6 @@ private: }; struct PersistentSrtInfo { - PersistentSrtInfo() : flattened_bufsize_dw(/*NumUserDataRegs*/ 16) {} - // Special case when fetch shader uses step rates. struct SrtSharpReservation { u32 sgpr_base; @@ -110,12 +76,12 @@ struct PersistentSrtInfo { SmallCodeArray walker; boost::container::small_vector srt_reservations; - u32 flattened_bufsize_dw; + u32 flattened_bufsize_dw = 16; // NumUserDataRegs // Special case for fetch shaders because we don't generate IR to read from step rate buffers, // so we won't see usage with GetUserData/ReadConst. // Reserve space in the flattened buffer for a sharp ahead of time - u32 reserve_sharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) { + u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) { u32 rv = flattened_bufsize_dw; srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords); flattened_bufsize_dw += num_dwords; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 5b7f76f00..b15eace12 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -4,6 +4,7 @@ #include #include "common/alignment.h" #include "common/scope_exit.h" +#include "common/types.h" #include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -301,10 +302,11 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); } -std::pair BufferCache::ObtainHostUBO(VAddr host_addr, u32 size) { +std::pair BufferCache::ObtainHostUBO(std::span data) { static constexpr u64 StreamThreshold = CACHING_PAGESIZE; - ASSERT(size <= StreamThreshold); - const u64 offset = stream_buffer.Copy(host_addr, size, instance.UniformMinAlignment()); + ASSERT(data.size_bytes() <= StreamThreshold); + const u64 offset = stream_buffer.Copy(reinterpret_cast(data.data()), data.size_bytes(), + instance.UniformMinAlignment()); return {&stream_buffer, offset}; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6f9e637da..e2519e942 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -84,7 +84,7 @@ public: /// Writes a value to GPU buffer. void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); - [[nodiscard]] std::pair ObtainHostUBO(VAddr host_addr, u32 size); + [[nodiscard]] std::pair ObtainHostUBO(std::span data); /// Obtains a buffer for the specified region. [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,