address some review comments

2025-08-04 08:22:32 +00:00 · 2024-10-12 22:56:41 -07:00 · 2024-10-12 22:56:41 -07:00 · 57df6f189f
commit 57df6f189f
parent e69881033e
8 changed files with 26 additions and 68 deletions
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -460,7 +460,7 @@ void EmitContext::DefineBuffers() {
        const auto storage_class = spv::StorageClass::Uniform;
        const Id pointer_type = TypePointer(storage_class, data_type);
        const Id record_array_type{
-            TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.num_dwords())))};
+            TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
        const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -418,7 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
        if (step_rate == Info::VsInput::OverStepRate0 ||
            step_rate == Info::VsInput::OverStepRate1) {
            info.buffers.push_back({
-                .sharp_idx = info.srt_info.reserve_sharp(attrib.sgpr_base, attrib.dword_offset, 4),
+                .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
                .used_types = IR::Type::F32,
                .is_instance_data = true,
            });
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@ -180,7 +180,7 @@ struct Info {
    SamplerResourceList samplers;
    PersistentSrtInfo srt_info;
-    FlattenedUserDataBuffer flattened_ud_buf;
+    std::vector<u32> flattened_ud_buf;
    std::span<const u32> user_data;
    Stage stage;
@ -213,7 +213,7 @@ struct Info {
    template <typename T>
    inline T ReadUdSharp(u32 sharp_idx) const noexcept {
-        return flattened_ud_buf.ReadUdSharp<T>(sharp_idx);
+        return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
    }
    template <typename T>
--- a/src/shader_recompiler/ir/breadth_first_search.h
+++ b/src/shader_recompiler/ir/breadth_first_search.h
@ -14,8 +14,8 @@ namespace Shader::IR {
 // Use typename Instruction so the function can be used to return either const or mutable
 // Insts depending on the context.
 template <typename Instruction, typename Pred>
-auto BreadthFirstSearch(Instruction* inst,
+auto BreadthFirstSearch(Instruction* inst, Pred&& pred)
-                        Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> {
+    -> std::invoke_result_t<Pred, Instruction*> {
    // Most often case the instruction is the desired already.
    if (std::optional result = pred(inst)) {
        return result;
@ -53,8 +53,8 @@ auto BreadthFirstSearch(Instruction* inst,
 }
 template <typename Pred>
-auto BreadthFirstSearch(const Value& value,
+auto BreadthFirstSearch(const Value& value, Pred&& pred)
-                        Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
+    -> std::invoke_result_t<Pred, const Inst*> {
    if (value.IsImmediate()) {
        // Nothing to do with immediates
        return std::nullopt;
--- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
+++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
@ -8,6 +8,7 @@
 #include <xbyak/xbyak_util.h>
 #include "common/config.h"
 #include "common/io_file.h"
 #include "common/logging/log.h"
 #include "common/path_util.h"
 #include "common/singleton.h"
 #include "shader_recompiler/info.h"
@ -98,15 +99,6 @@ struct PassInfo {
 namespace Shader::Optimization {
 namespace {
 static IR::Value GetReadConstOff(const IR::Inst* inst) {
    ASSERT(inst->GetOpcode() == IR::Opcode::ReadConst);
    return inst->Arg(1);
 }
 static IR::ScalarReg GetUserDataSgprBase(const IR::Inst* inst) {
    ASSERT(inst->GetOpcode() == IR::Opcode::GetUserData);
    return inst->Arg(0).ScalarReg();
 }
 static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
    c.push(rdi);
@ -158,14 +150,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
    pass_info.dst_off_dw = NumUserDataRegs;
    // Special case for V# step rate buffers in fetch shader
-    for (auto i = 0; i < info.srt_info.srt_reservations.size(); i++) {
+    for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
        PersistentSrtInfo::SrtSharpReservation res = info.srt_info.srt_reservations[i];
        // get pointer to V#
-        c.mov(r10d, ptr[rdi + (res.sgpr_base << 2)]);
+        c.mov(r10d, ptr[rdi + (sgpr_base << 2)]);
-        u32 src_off = res.dword_offset << 2;
+        u32 src_off = dword_offset << 2;
-        for (auto j = 0; j < res.num_dwords; j++) {
+        for (auto j = 0; j < num_dwords; j++) {
            c.mov(r11d, ptr[r10d + src_off]);
            c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
@ -210,7 +201,8 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
        IR::Block* block = *r_it;
        for (IR::Inst& inst : *block) {
            if (inst.GetOpcode() == IR::Opcode::ReadConst) {
-                if (!GetReadConstOff(&inst).IsImmediate()) {
+                if (!inst.Arg(1).IsImmediate()) {
                    LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
                    continue;
                }
@ -233,8 +225,6 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
                auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
                ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory");
                // TODO this probably requires some template magic to fix. BFS needs non-const
                // variant. Needs to be non-const to change flags
                IR::Inst* ptr_lo = base0.value();
                ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
@ -242,10 +232,10 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
                    pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
                PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
-                user_list[GetReadConstOff(&inst).U32()] = &inst;
+                user_list[inst.Arg(1).U32()] = &inst;
                if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
-                    IR::ScalarReg ud_reg = GetUserDataSgprBase(ptr_lo);
+                    IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
                    pass_info.srt_roots[ud_reg] = ptr_lo;
                }
            }
--- a/src/shader_recompiler/ir/passes/srt.h
+++ b/src/shader_recompiler/ir/passes/srt.h
@ -18,39 +18,7 @@
 namespace Shader {
-class FlattenedUserDataBuffer {
+using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
 public:
    template <typename T>
    T ReadUdSharp(u32 sharp_idx) const noexcept {
        return *reinterpret_cast<const T*>(&buf[sharp_idx]);
    }
    size_t num_dwords() const {
        return buf.size();
    }
    size_t size_bytes() const {
        return buf.size() * sizeof(u32);
    }
    u32* data() {
        return buf.data();
    }
    const u32* data() const {
        return buf.data();
    }
    void resize(size_t new_size_dw) {
        buf.resize(new_size_dw);
    }
 private:
    std::vector<u32> buf;
 };
 typedef void(__attribute__((sysv_abi)) * PFN_SrtWalker)(const u32* /*user_data*/,
                                                        u32* /*flat_dst*/);
 // Utility for copying a simple relocatable function from a Xbyak code generator to manage memory
 // separately
@ -99,8 +67,6 @@ private:
 };
 struct PersistentSrtInfo {
    PersistentSrtInfo() : flattened_bufsize_dw(/*NumUserDataRegs*/ 16) {}
    // Special case when fetch shader uses step rates.
    struct SrtSharpReservation {
        u32 sgpr_base;
@ -110,12 +76,12 @@ struct PersistentSrtInfo {
    SmallCodeArray walker;
    boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
-    u32 flattened_bufsize_dw;
+    u32 flattened_bufsize_dw = 16; // NumUserDataRegs
    // Special case for fetch shaders because we don't generate IR to read from step rate buffers,
    // so we won't see usage with GetUserData/ReadConst.
    // Reserve space in the flattened buffer for a sharp ahead of time
-    u32 reserve_sharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
+    u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
        u32 rv = flattened_bufsize_dw;
        srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
        flattened_bufsize_dw += num_dwords;
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@ -4,6 +4,7 @@
 #include <algorithm>
 #include "common/alignment.h"
 #include "common/scope_exit.h"
 #include "common/types.h"
 #include "shader_recompiler/info.h"
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/buffer_cache/buffer_cache.h"
@ -301,10 +302,11 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
    cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
 }
-std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(VAddr host_addr, u32 size) {
+std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
    static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
-    ASSERT(size <= StreamThreshold);
+    ASSERT(data.size_bytes() <= StreamThreshold);
-    const u64 offset = stream_buffer.Copy(host_addr, size, instance.UniformMinAlignment());
+    const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
                                          instance.UniformMinAlignment());
    return {&stream_buffer, offset};
 }
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@ -84,7 +84,7 @@ public:
    /// Writes a value to GPU buffer.
    void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
-    [[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(VAddr host_addr, u32 size);
+    [[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
    /// Obtains a buffer for the specified region.
    [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,