address some review comments

This commit is contained in:
Frodo Baggins 2024-10-12 22:56:41 -07:00
parent e69881033e
commit 57df6f189f
8 changed files with 26 additions and 68 deletions

View File

@ -460,7 +460,7 @@ void EmitContext::DefineBuffers() {
const auto storage_class = spv::StorageClass::Uniform;
const Id pointer_type = TypePointer(storage_class, data_type);
const Id record_array_type{
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.num_dwords())))};
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};

View File

@ -418,7 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({
.sharp_idx = info.srt_info.reserve_sharp(attrib.sgpr_base, attrib.dword_offset, 4),
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32,
.is_instance_data = true,
});

View File

@ -180,7 +180,7 @@ struct Info {
SamplerResourceList samplers;
PersistentSrtInfo srt_info;
FlattenedUserDataBuffer flattened_ud_buf;
std::vector<u32> flattened_ud_buf;
std::span<const u32> user_data;
Stage stage;
@ -213,7 +213,7 @@ struct Info {
template <typename T>
inline T ReadUdSharp(u32 sharp_idx) const noexcept {
return flattened_ud_buf.ReadUdSharp<T>(sharp_idx);
return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
}
template <typename T>

View File

@ -14,8 +14,8 @@ namespace Shader::IR {
// Use typename Instruction so the function can be used to return either const or mutable
// Insts depending on the context.
template <typename Instruction, typename Pred>
auto BreadthFirstSearch(Instruction* inst,
Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> {
auto BreadthFirstSearch(Instruction* inst, Pred&& pred)
-> std::invoke_result_t<Pred, Instruction*> {
// Most often case the instruction is the desired already.
if (std::optional result = pred(inst)) {
return result;
@ -53,8 +53,8 @@ auto BreadthFirstSearch(Instruction* inst,
}
template <typename Pred>
auto BreadthFirstSearch(const Value& value,
Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
auto BreadthFirstSearch(const Value& value, Pred&& pred)
-> std::invoke_result_t<Pred, const Inst*> {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;

View File

@ -8,6 +8,7 @@
#include <xbyak/xbyak_util.h>
#include "common/config.h"
#include "common/io_file.h"
#include "common/logging/log.h"
#include "common/path_util.h"
#include "common/singleton.h"
#include "shader_recompiler/info.h"
@ -98,15 +99,6 @@ struct PassInfo {
namespace Shader::Optimization {
namespace {
static IR::Value GetReadConstOff(const IR::Inst* inst) {
ASSERT(inst->GetOpcode() == IR::Opcode::ReadConst);
return inst->Arg(1);
}
static IR::ScalarReg GetUserDataSgprBase(const IR::Inst* inst) {
ASSERT(inst->GetOpcode() == IR::Opcode::GetUserData);
return inst->Arg(0).ScalarReg();
}
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
c.push(rdi);
@ -158,14 +150,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
pass_info.dst_off_dw = NumUserDataRegs;
// Special case for V# step rate buffers in fetch shader
for (auto i = 0; i < info.srt_info.srt_reservations.size(); i++) {
PersistentSrtInfo::SrtSharpReservation res = info.srt_info.srt_reservations[i];
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
// get pointer to V#
c.mov(r10d, ptr[rdi + (res.sgpr_base << 2)]);
c.mov(r10d, ptr[rdi + (sgpr_base << 2)]);
u32 src_off = res.dword_offset << 2;
u32 src_off = dword_offset << 2;
for (auto j = 0; j < res.num_dwords; j++) {
for (auto j = 0; j < num_dwords; j++) {
c.mov(r11d, ptr[r10d + src_off]);
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
@ -210,7 +201,8 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
IR::Block* block = *r_it;
for (IR::Inst& inst : *block) {
if (inst.GetOpcode() == IR::Opcode::ReadConst) {
if (!GetReadConstOff(&inst).IsImmediate()) {
if (!inst.Arg(1).IsImmediate()) {
LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
continue;
}
@ -233,8 +225,6 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory");
// TODO this probably requires some template magic to fix. BFS needs non-const
// variant. Needs to be non-const to change flags
IR::Inst* ptr_lo = base0.value();
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
@ -242,10 +232,10 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
user_list[GetReadConstOff(&inst).U32()] = &inst;
user_list[inst.Arg(1).U32()] = &inst;
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
IR::ScalarReg ud_reg = GetUserDataSgprBase(ptr_lo);
IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
pass_info.srt_roots[ud_reg] = ptr_lo;
}
}

View File

@ -18,39 +18,7 @@
namespace Shader {
class FlattenedUserDataBuffer {
public:
template <typename T>
T ReadUdSharp(u32 sharp_idx) const noexcept {
return *reinterpret_cast<const T*>(&buf[sharp_idx]);
}
size_t num_dwords() const {
return buf.size();
}
size_t size_bytes() const {
return buf.size() * sizeof(u32);
}
u32* data() {
return buf.data();
}
const u32* data() const {
return buf.data();
}
void resize(size_t new_size_dw) {
buf.resize(new_size_dw);
}
private:
std::vector<u32> buf;
};
typedef void(__attribute__((sysv_abi)) * PFN_SrtWalker)(const u32* /*user_data*/,
u32* /*flat_dst*/);
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
// Utility for copying a simple relocatable function from a Xbyak code generator to manage memory
// separately
@ -99,8 +67,6 @@ private:
};
struct PersistentSrtInfo {
PersistentSrtInfo() : flattened_bufsize_dw(/*NumUserDataRegs*/ 16) {}
// Special case when fetch shader uses step rates.
struct SrtSharpReservation {
u32 sgpr_base;
@ -110,12 +76,12 @@ struct PersistentSrtInfo {
SmallCodeArray walker;
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
u32 flattened_bufsize_dw;
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
// so we won't see usage with GetUserData/ReadConst.
// Reserve space in the flattened buffer for a sharp ahead of time
u32 reserve_sharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
u32 rv = flattened_bufsize_dw;
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
flattened_bufsize_dw += num_dwords;

View File

@ -4,6 +4,7 @@
#include <algorithm>
#include "common/alignment.h"
#include "common/scope_exit.h"
#include "common/types.h"
#include "shader_recompiler/info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h"
@ -301,10 +302,11 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
}
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(VAddr host_addr, u32 size) {
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
ASSERT(size <= StreamThreshold);
const u64 offset = stream_buffer.Copy(host_addr, size, instance.UniformMinAlignment());
ASSERT(data.size_bytes() <= StreamThreshold);
const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
instance.UniformMinAlignment());
return {&stream_buffer, offset};
}

View File

@ -84,7 +84,7 @@ public:
/// Writes a value to GPU buffer.
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(VAddr host_addr, u32 size);
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
/// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,