mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 08:22:32 +00:00
address some review comments
This commit is contained in:
parent
e69881033e
commit
57df6f189f
@ -460,7 +460,7 @@ void EmitContext::DefineBuffers() {
|
|||||||
const auto storage_class = spv::StorageClass::Uniform;
|
const auto storage_class = spv::StorageClass::Uniform;
|
||||||
const Id pointer_type = TypePointer(storage_class, data_type);
|
const Id pointer_type = TypePointer(storage_class, data_type);
|
||||||
const Id record_array_type{
|
const Id record_array_type{
|
||||||
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.num_dwords())))};
|
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
|
||||||
|
|
||||||
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
|
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
|
||||||
|
|
||||||
|
@ -418,7 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
|
|||||||
if (step_rate == Info::VsInput::OverStepRate0 ||
|
if (step_rate == Info::VsInput::OverStepRate0 ||
|
||||||
step_rate == Info::VsInput::OverStepRate1) {
|
step_rate == Info::VsInput::OverStepRate1) {
|
||||||
info.buffers.push_back({
|
info.buffers.push_back({
|
||||||
.sharp_idx = info.srt_info.reserve_sharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
||||||
.used_types = IR::Type::F32,
|
.used_types = IR::Type::F32,
|
||||||
.is_instance_data = true,
|
.is_instance_data = true,
|
||||||
});
|
});
|
||||||
|
@ -180,7 +180,7 @@ struct Info {
|
|||||||
SamplerResourceList samplers;
|
SamplerResourceList samplers;
|
||||||
|
|
||||||
PersistentSrtInfo srt_info;
|
PersistentSrtInfo srt_info;
|
||||||
FlattenedUserDataBuffer flattened_ud_buf;
|
std::vector<u32> flattened_ud_buf;
|
||||||
|
|
||||||
std::span<const u32> user_data;
|
std::span<const u32> user_data;
|
||||||
Stage stage;
|
Stage stage;
|
||||||
@ -213,7 +213,7 @@ struct Info {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline T ReadUdSharp(u32 sharp_idx) const noexcept {
|
inline T ReadUdSharp(u32 sharp_idx) const noexcept {
|
||||||
return flattened_ud_buf.ReadUdSharp<T>(sharp_idx);
|
return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
@ -14,8 +14,8 @@ namespace Shader::IR {
|
|||||||
// Use typename Instruction so the function can be used to return either const or mutable
|
// Use typename Instruction so the function can be used to return either const or mutable
|
||||||
// Insts depending on the context.
|
// Insts depending on the context.
|
||||||
template <typename Instruction, typename Pred>
|
template <typename Instruction, typename Pred>
|
||||||
auto BreadthFirstSearch(Instruction* inst,
|
auto BreadthFirstSearch(Instruction* inst, Pred&& pred)
|
||||||
Pred&& pred) -> std::invoke_result_t<Pred, Instruction*> {
|
-> std::invoke_result_t<Pred, Instruction*> {
|
||||||
// Most often case the instruction is the desired already.
|
// Most often case the instruction is the desired already.
|
||||||
if (std::optional result = pred(inst)) {
|
if (std::optional result = pred(inst)) {
|
||||||
return result;
|
return result;
|
||||||
@ -53,8 +53,8 @@ auto BreadthFirstSearch(Instruction* inst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Pred>
|
template <typename Pred>
|
||||||
auto BreadthFirstSearch(const Value& value,
|
auto BreadthFirstSearch(const Value& value, Pred&& pred)
|
||||||
Pred&& pred) -> std::invoke_result_t<Pred, const Inst*> {
|
-> std::invoke_result_t<Pred, const Inst*> {
|
||||||
if (value.IsImmediate()) {
|
if (value.IsImmediate()) {
|
||||||
// Nothing to do with immediates
|
// Nothing to do with immediates
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <xbyak/xbyak_util.h>
|
#include <xbyak/xbyak_util.h>
|
||||||
#include "common/config.h"
|
#include "common/config.h"
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
#include "common/singleton.h"
|
#include "common/singleton.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
@ -98,15 +99,6 @@ struct PassInfo {
|
|||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
static IR::Value GetReadConstOff(const IR::Inst* inst) {
|
|
||||||
ASSERT(inst->GetOpcode() == IR::Opcode::ReadConst);
|
|
||||||
return inst->Arg(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static IR::ScalarReg GetUserDataSgprBase(const IR::Inst* inst) {
|
|
||||||
ASSERT(inst->GetOpcode() == IR::Opcode::GetUserData);
|
|
||||||
return inst->Arg(0).ScalarReg();
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
|
static inline void PushPtr(Xbyak::CodeGenerator& c, u32 off_dw) {
|
||||||
c.push(rdi);
|
c.push(rdi);
|
||||||
@ -158,14 +150,13 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) {
|
|||||||
pass_info.dst_off_dw = NumUserDataRegs;
|
pass_info.dst_off_dw = NumUserDataRegs;
|
||||||
|
|
||||||
// Special case for V# step rate buffers in fetch shader
|
// Special case for V# step rate buffers in fetch shader
|
||||||
for (auto i = 0; i < info.srt_info.srt_reservations.size(); i++) {
|
for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) {
|
||||||
PersistentSrtInfo::SrtSharpReservation res = info.srt_info.srt_reservations[i];
|
|
||||||
// get pointer to V#
|
// get pointer to V#
|
||||||
c.mov(r10d, ptr[rdi + (res.sgpr_base << 2)]);
|
c.mov(r10d, ptr[rdi + (sgpr_base << 2)]);
|
||||||
|
|
||||||
u32 src_off = res.dword_offset << 2;
|
u32 src_off = dword_offset << 2;
|
||||||
|
|
||||||
for (auto j = 0; j < res.num_dwords; j++) {
|
for (auto j = 0; j < num_dwords; j++) {
|
||||||
c.mov(r11d, ptr[r10d + src_off]);
|
c.mov(r11d, ptr[r10d + src_off]);
|
||||||
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
|
c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d);
|
||||||
|
|
||||||
@ -210,7 +201,8 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
|
|||||||
IR::Block* block = *r_it;
|
IR::Block* block = *r_it;
|
||||||
for (IR::Inst& inst : *block) {
|
for (IR::Inst& inst : *block) {
|
||||||
if (inst.GetOpcode() == IR::Opcode::ReadConst) {
|
if (inst.GetOpcode() == IR::Opcode::ReadConst) {
|
||||||
if (!GetReadConstOff(&inst).IsImmediate()) {
|
if (!inst.Arg(1).IsImmediate()) {
|
||||||
|
LOG_WARNING(Render_Recompiler, "ReadConst has non-immediate offset");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,8 +225,6 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
|
|||||||
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
|
auto base1 = IR::BreadthFirstSearch(ptr_composite->Arg(1), pred);
|
||||||
ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory");
|
ASSERT_MSG(base0 && base1 && "ReadConst not from constant memory");
|
||||||
|
|
||||||
// TODO this probably requires some template magic to fix. BFS needs non-const
|
|
||||||
// variant. Needs to be non-const to change flags
|
|
||||||
IR::Inst* ptr_lo = base0.value();
|
IR::Inst* ptr_lo = base0.value();
|
||||||
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
|
ptr_lo = pass_info.DeduplicateInstruction(ptr_lo);
|
||||||
|
|
||||||
@ -242,10 +232,10 @@ void FlattenExtendedUserdataPass(IR::Program& program) {
|
|||||||
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
|
pass_info.pointer_uses.try_emplace(ptr_lo, PassInfo::PtrUserList{});
|
||||||
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
|
PassInfo::PtrUserList& user_list = ptr_uses_kv.first->second;
|
||||||
|
|
||||||
user_list[GetReadConstOff(&inst).U32()] = &inst;
|
user_list[inst.Arg(1).U32()] = &inst;
|
||||||
|
|
||||||
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
|
if (ptr_lo->GetOpcode() == IR::Opcode::GetUserData) {
|
||||||
IR::ScalarReg ud_reg = GetUserDataSgprBase(ptr_lo);
|
IR::ScalarReg ud_reg = ptr_lo->Arg(0).ScalarReg();
|
||||||
pass_info.srt_roots[ud_reg] = ptr_lo;
|
pass_info.srt_roots[ud_reg] = ptr_lo;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,39 +18,7 @@
|
|||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
class FlattenedUserDataBuffer {
|
using PFN_SrtWalker = void PS4_SYSV_ABI (*)(const u32* /*user_data*/, u32* /*flat_dst*/);
|
||||||
public:
|
|
||||||
template <typename T>
|
|
||||||
T ReadUdSharp(u32 sharp_idx) const noexcept {
|
|
||||||
return *reinterpret_cast<const T*>(&buf[sharp_idx]);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t num_dwords() const {
|
|
||||||
return buf.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size_bytes() const {
|
|
||||||
return buf.size() * sizeof(u32);
|
|
||||||
}
|
|
||||||
|
|
||||||
u32* data() {
|
|
||||||
return buf.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32* data() const {
|
|
||||||
return buf.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
void resize(size_t new_size_dw) {
|
|
||||||
buf.resize(new_size_dw);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::vector<u32> buf;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef void(__attribute__((sysv_abi)) * PFN_SrtWalker)(const u32* /*user_data*/,
|
|
||||||
u32* /*flat_dst*/);
|
|
||||||
|
|
||||||
// Utility for copying a simple relocatable function from a Xbyak code generator to manage memory
|
// Utility for copying a simple relocatable function from a Xbyak code generator to manage memory
|
||||||
// separately
|
// separately
|
||||||
@ -99,8 +67,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct PersistentSrtInfo {
|
struct PersistentSrtInfo {
|
||||||
PersistentSrtInfo() : flattened_bufsize_dw(/*NumUserDataRegs*/ 16) {}
|
|
||||||
|
|
||||||
// Special case when fetch shader uses step rates.
|
// Special case when fetch shader uses step rates.
|
||||||
struct SrtSharpReservation {
|
struct SrtSharpReservation {
|
||||||
u32 sgpr_base;
|
u32 sgpr_base;
|
||||||
@ -110,12 +76,12 @@ struct PersistentSrtInfo {
|
|||||||
|
|
||||||
SmallCodeArray walker;
|
SmallCodeArray walker;
|
||||||
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
|
boost::container::small_vector<SrtSharpReservation, 2> srt_reservations;
|
||||||
u32 flattened_bufsize_dw;
|
u32 flattened_bufsize_dw = 16; // NumUserDataRegs
|
||||||
|
|
||||||
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
|
// Special case for fetch shaders because we don't generate IR to read from step rate buffers,
|
||||||
// so we won't see usage with GetUserData/ReadConst.
|
// so we won't see usage with GetUserData/ReadConst.
|
||||||
// Reserve space in the flattened buffer for a sharp ahead of time
|
// Reserve space in the flattened buffer for a sharp ahead of time
|
||||||
u32 reserve_sharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
|
u32 ReserveSharp(u32 sgpr_base, u32 dword_offset, u32 num_dwords) {
|
||||||
u32 rv = flattened_bufsize_dw;
|
u32 rv = flattened_bufsize_dw;
|
||||||
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
|
srt_reservations.emplace_back(sgpr_base, dword_offset, num_dwords);
|
||||||
flattened_bufsize_dw += num_dwords;
|
flattened_bufsize_dw += num_dwords;
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
@ -301,10 +302,11 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
|||||||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(VAddr host_addr, u32 size) {
|
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
||||||
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
|
||||||
ASSERT(size <= StreamThreshold);
|
ASSERT(data.size_bytes() <= StreamThreshold);
|
||||||
const u64 offset = stream_buffer.Copy(host_addr, size, instance.UniformMinAlignment());
|
const u64 offset = stream_buffer.Copy(reinterpret_cast<VAddr>(data.data()), data.size_bytes(),
|
||||||
|
instance.UniformMinAlignment());
|
||||||
return {&stream_buffer, offset};
|
return {&stream_buffer, offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ public:
|
|||||||
/// Writes a value to GPU buffer.
|
/// Writes a value to GPU buffer.
|
||||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||||
|
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(VAddr host_addr, u32 size);
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainHostUBO(std::span<const u32> data);
|
||||||
|
|
||||||
/// Obtains a buffer for the specified region.
|
/// Obtains a buffer for the specified region.
|
||||||
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
|
||||||
|
Loading…
Reference in New Issue
Block a user