address some review comments

This commit is contained in:
Frodo Baggins 2024-10-12 22:56:41 -07:00
parent e69881033e
commit 57df6f189f
8 changed files with 26 additions and 68 deletions

View File

@ -460,7 +460,7 @@ void EmitContext::DefineBuffers() {
const auto storage_class = spv::StorageClass::Uniform; const auto storage_class = spv::StorageClass::Uniform;
const Id pointer_type = TypePointer(storage_class, data_type); const Id pointer_type = TypePointer(storage_class, data_type);
const Id record_array_type{ const Id record_array_type{
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.num_dwords())))}; TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")}; const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};

View File

@ -418,7 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
if (step_rate == Info::VsInput::OverStepRate0 || if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) { step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({ info.buffers.push_back({
.sharp_idx = info.srt_info.reserve_sharp(attrib.sgpr_base, attrib.dword_offset, 4), .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32, .used_types = IR::Type::F32,
.is_instance_data = true, .is_instance_data = true,
}); });

View File

@ -180,7 +180,7 @@ struct Info {
SamplerResourceList samplers; SamplerResourceList samplers;
PersistentSrtInfo srt_info; PersistentSrtInfo srt_info;
FlattenedUserDataBuffer flattened_ud_buf; std::vector<u32> flattened_ud_buf;
std::span<const u32> user_data; std::span<const u32> user_data;
Stage stage; Stage stage;
@ -213,7 +213,7 @@ struct Info {
template <typename T> template <typename T>
inline T ReadUdSharp(u32 sharp_idx) const noexcept { inline T ReadUdSharp(u32 sharp_idx) const noexcept {
return flattened_ud_buf.ReadUdSharp<T>(sharp_idx); return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
} }
template <typename T> template <typename T>

View File

@ -14,8 +14,8 @@ namespace Shader::IR {
// Use typename Instruction so the function can be used to return either const or mutable // Use typename Instruction so the function can be used to return either const or mutable
// Insts depending on the context. // Insts depending on the context.
template <typename Instruction, typename Pred> template <typename Instruction, typename Pred>
auto BreadthFirstSearch(Instruction* inst, auto BreadthFirstSearch(Instruction* inst, Pred&& pred)
Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> { -> std::invoke_result_t<Pred, Instruction*> {
// Most often case the instruction is the desired already. // Most often case the instruction is the desired already.
if (std::optional result = pred(inst)) { if (std::optional result = pred(inst)) {
return result; return result;
@ -53,8 +53,8 @@ auto BreadthFirstSearch(Instruction* inst,
} }
template <typename Pred> template <typename Pred>
auto BreadthFirstSearch(const Value& value, auto BreadthFirstSearch(const Value& value, Pred&& pred)
Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> { -> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) { if (value.IsImmediate()) {
// Nothing to do with immediates // Nothing to do with immediates
return std::nullopt; return std::nullopt;

View File

@ -8,6 +8,7 @@
#include <xbyak/xbyak_util.h> #include <xbyak/xbyak_util.h>
#include "common/config.h" #include "common/config.h"
#include "common/io_file.h" #include "common/io_file.h"
#include "common/logging/log.h"
#include "common/path_util.h" #include "common/path_util.h"
#include "common/singleton.h" #include "common/singleton.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
@ -98,15 +99,6 @@ struct PassInfo {
namespace Shader::Optimization { namespace Shader::Optimization {
namespace { namespace {
static IR::Value GetReadConstOff(const IR::Inst* inst) {
ASSERT(inst->GetOpcode() == IR::Opcode::ReadConst);
return inst->Arg(1);
}
static IR::ScalarReg GetUserDataSgprBase(const IR::Inst* inst) {
ASSERT(inst->GetOpcode() == IR::Opcode::GetUserData);
return inst->Arg(0).ScalarReg();
}
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) { static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
c.push(rdi); c.push(rdi);
@ -158,14 +150,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
pass_info.dst_off_dw = NumUserDataRegs; pass_info.dst_off_dw = NumUserDataRegs;
// Special case for V# step rate buffers in fetch shader // Special case for V# step rate buffers in fetch shader
for (auto i = 0; i < info.srt_info.srt_reservations.size(); i++) { for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
PersistentSrtInfo::SrtSharpReservation res = info.srt_info.srt_reservations[i];
// get pointer to V# // get pointer to V#
c.mov(r10d, ptr[rdi + (res.sgpr_base << 2)]); c.mov(r10d, ptr[rdi + (sgpr_base << 2)]);
u32 src_off = res.dword_offset << 2; u32 src_off = dword_offset << 2;
for (auto j = 0; j < res.num_dwords; j++) { for (auto j = 0; j < num_dwords; j++) {
c.mov(r11d, ptr[r10d + src_off]); c.mov(r11d, ptr[r10d + src_off]);
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d); c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
@ -210,7 +201,8 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
IR::Block* block = *r_it; IR::Block* block = *r_it;
for (IR::Inst& inst : *block) { for (IR::Inst& inst : *block) {
if (inst.GetOpcode() == IR::Opcode::ReadConst) { if (inst.GetOpcode() == IR::Opcode::ReadConst) {
if (!GetReadConstOff(&inst).IsImmediate()) { if (!inst.Arg(1).IsImmediate()) {
LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
continue; continue;
} }
@ -233,8 +225,6 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred); auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory"); ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory");
// TODO this probably requires some template magic to fix. BFS needs non-const
// variant. Needs to be non-const to change flags
IR::Inst* ptr_lo = base0.value(); IR::Inst* ptr_lo = base0.value();
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo); ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
@ -242,10 +232,10 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{}); pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second; PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
user_list[GetReadConstOff(&inst).U32()] = &inst; user_list[inst.Arg(1).U32()] = &inst;
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) { if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
IR::ScalarReg ud_reg = GetUserDataSgprBase(ptr_lo); IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
pass_info.srt_roots[ud_reg] = ptr_lo; pass_info.srt_roots[ud_reg] = ptr_lo;
} }
} }

View File

@ -18,39 +18,7 @@
namespace Shader { namespace Shader {
class FlattenedUserDataBuffer { using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
public:
template <typename T>
T ReadUdSharp(u32 sharp_idx) const noexcept {
return *reinterpret_cast<const T*>(&buf[sharp_idx]);
}
size_t num_dwords() const {
return buf.size();
}
size_t size_bytes() const {
return buf.size() * sizeof(u32);
}
u32* data() {
return buf.data();
}
const u32* data() const {
return buf.data();
}
void resize(size_t new_size_dw) {
buf.resize(new_size_dw);
}
private:
std::vector<u32> buf;
};
typedef void(__attribute__((sysv_abi)) * PFN_SrtWalker)(const u32* /*user_data*/,
u32* /*flat_dst*/);
// Utility for copying a simple relocatable function from a Xbyak code generator to manage memory // Utility for copying a simple relocatable function from a Xbyak code generator to manage memory
// separately // separately
@ -99,8 +67,6 @@ private:
}; };
struct PersistentSrtInfo { struct PersistentSrtInfo {
PersistentSrtInfo() : flattened_bufsize_dw(/*NumUserDataRegs*/ 16) {}
// Special case when fetch shader uses step rates. // Special case when fetch shader uses step rates.
struct SrtSharpReservation { struct SrtSharpReservation {
u32 sgpr_base; u32 sgpr_base;
@ -110,12 +76,12 @@ struct PersistentSrtInfo {
SmallCodeArray walker; SmallCodeArray walker;
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations; boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
u32 flattened_bufsize_dw; u32 flattened_bufsize_dw = 16; // NumUserDataRegs
// Special case for fetch shaders because we don't generate IR to read from step rate buffers, // Special case for fetch shaders because we don't generate IR to read from step rate buffers,
// so we won't see usage with GetUserData/ReadConst. // so we won't see usage with GetUserData/ReadConst.
// Reserve space in the flattened buffer for a sharp ahead of time // Reserve space in the flattened buffer for a sharp ahead of time
u32 reserve_sharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) { u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
u32 rv = flattened_bufsize_dw; u32 rv = flattened_bufsize_dw;
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords); srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
flattened_bufsize_dw += num_dwords; flattened_bufsize_dw += num_dwords;

View File

@ -4,6 +4,7 @@
#include <algorithm> #include <algorithm>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/types.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -301,10 +302,11 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value); cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
} }
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(VAddr host_addr, u32 size) { std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE; static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
ASSERT(size <= StreamThreshold); ASSERT(data.size_bytes() <= StreamThreshold);
const u64 offset = stream_buffer.Copy(host_addr, size, instance.UniformMinAlignment()); const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
instance.UniformMinAlignment());
return {&stream_buffer, offset}; return {&stream_buffer, offset};
} }

View File

@ -84,7 +84,7 @@ public:
/// Writes a value to GPU buffer. /// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(VAddr host_addr, u32 size); [[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
/// Obtains a buffer for the specified region. /// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,