mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 04:25:12 +00:00
Make ReadConst a SPIR-V function
This commit is contained in:
parent
1fcd800c46
commit
eeb5da33d0
@ -163,25 +163,14 @@ void EmitGetGotoVariable(EmitContext&) {
|
||||
using PointerType = EmitContext::PointerType;
|
||||
|
||||
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
|
||||
const Id base_lo = ctx.OpUConvert(ctx.U64, ctx.OpCompositeExtract(ctx.U32[1], addr, 0));
|
||||
const Id base_hi = ctx.OpUConvert(ctx.U64, ctx.OpCompositeExtract(ctx.U32[1], addr, 1));
|
||||
const Id base_sift = ctx.OpShiftLeftLogical(ctx.U64, base_hi, ctx.ConstU32(32u));
|
||||
const Id base = ctx.OpBitwiseOr(ctx.U64, base_lo, base_sift);
|
||||
const Id offset_bytes = ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.ConstU32(2u));
|
||||
const Id address = ctx.OpIAdd(ctx.U64, base, ctx.OpUConvert(ctx.U64, offset_bytes));
|
||||
return ctx.EmitMemoryAccess(ctx.U32[1], address, [&]() {
|
||||
const u32 flatbuf_off_dw = inst->Flags<u32>();
|
||||
// We can only provide a fallback for immediate offsets.
|
||||
if (flatbuf_off_dw == 0) {
|
||||
return ctx.u32_zero_value;
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
|
||||
} else {
|
||||
const auto& srt_flatbuf = ctx.buffers[ctx.flatbuf_index];
|
||||
ASSERT(srt_flatbuf.binding >= 0 && srt_flatbuf.buffer_type == BufferType::Flatbuf);
|
||||
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
|
||||
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value,
|
||||
ctx.ConstU32(flatbuf_off_dw))};
|
||||
return ctx.OpLoad(ctx.U32[1], ptr);
|
||||
return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset,
|
||||
ctx.ConstU32(flatbuf_off_dw));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
|
||||
|
@ -144,9 +144,12 @@ void EmitContext::DefineArithmeticTypes() {
|
||||
|
||||
true_value = ConstantTrue(U1[1]);
|
||||
false_value = ConstantFalse(U1[1]);
|
||||
u8_one_value = Constant(U8, 1U);
|
||||
u8_zero_value = Constant(U8, 0U);
|
||||
u32_one_value = ConstU32(1U);
|
||||
u32_zero_value = ConstU32(0U);
|
||||
f32_zero_value = ConstF32(0.0f);
|
||||
u64_one_value = Constant(U64, 1ULL);
|
||||
u64_zero_value = Constant(U64, 0ULL);
|
||||
|
||||
pi_x2 = ConstF32(2.0f * float{std::numbers::pi});
|
||||
@ -194,17 +197,6 @@ void EmitContext::DefineArithmeticTypes() {
|
||||
physical_pointer_types[PointerType::U8] =
|
||||
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
|
||||
}
|
||||
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
|
||||
caching_pagebits_value =
|
||||
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS));
|
||||
caching_pagemask_value = Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1);
|
||||
|
||||
// Used to calculate fault buffer position and mask
|
||||
u32_three_value = ConstU32(3U);
|
||||
u32_seven_value = ConstU32(7U);
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineInterfaces() {
|
||||
@ -760,16 +752,19 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
|
||||
};
|
||||
|
||||
void EmitContext::DefineBuffers() {
|
||||
if (!profile.supports_robust_buffer_access && !info.has_readconst) {
|
||||
if (!profile.supports_robust_buffer_access &&
|
||||
info.readconst_types == Info::ReadConstType::None) {
|
||||
// In case Flatbuf has not already been bound by IR and is needed
|
||||
// to query buffer sizes, bind it now.
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
// We can't guarantee that flatbuf will now grow bast UBO
|
||||
// We can't guarantee that flatbuf will not grow past UBO
|
||||
// limit if there are a lot of ReadConsts. (We could specialize)
|
||||
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
|
||||
.buffer_type = BufferType::Flatbuf,
|
||||
});
|
||||
// In the future we may want to read buffer sizes from GPU memory if available.
|
||||
// info.readconst_types |= Info::ReadConstType::Immediate;
|
||||
}
|
||||
for (const auto& desc : info.buffers) {
|
||||
const auto buf_sharp = desc.GetSharp(info);
|
||||
@ -1073,6 +1068,101 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie
|
||||
return func;
|
||||
}
|
||||
|
||||
Id EmitContext::DefineGetBdaPointer() {
|
||||
const auto caching_pagebits{
|
||||
Constant(U64, static_cast<u64>(VideoCore::BufferCache::CACHING_PAGEBITS))};
|
||||
const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
|
||||
|
||||
const auto func_type{TypeFunction(U64, U64)};
|
||||
const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const auto address{OpFunctionParameter(U64)};
|
||||
Name(func, "get_bda_pointer");
|
||||
AddLabel();
|
||||
|
||||
const auto fault_label{OpLabel()};
|
||||
const auto available_label{OpLabel()};
|
||||
const auto merge_label{OpLabel()};
|
||||
|
||||
// Get page BDA
|
||||
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
|
||||
const auto page32{OpUConvert(U32[1], page)};
|
||||
const auto& bda_buffer{buffers[bda_pagetable_index]};
|
||||
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
|
||||
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
|
||||
const auto bda{OpLoad(U64, bda_ptr)};
|
||||
|
||||
// Check if page is GPU cached
|
||||
const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
|
||||
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||
OpBranchConditional(is_fault, fault_label, available_label);
|
||||
|
||||
// First time acces, mark as fault
|
||||
AddLabel(fault_label);
|
||||
const auto& fault_buffer{buffers[fault_buffer_index]};
|
||||
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
|
||||
const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))};
|
||||
const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))};
|
||||
const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)};
|
||||
const auto fault_ptr{
|
||||
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)};
|
||||
const auto fault_value{OpLoad(U8, fault_ptr)};
|
||||
const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)};
|
||||
OpStore(fault_ptr, fault_value_masked);
|
||||
|
||||
// Return null pointer
|
||||
const auto fallback_result{u64_zero_value};
|
||||
OpBranch(merge_label);
|
||||
|
||||
// Value is available, compute address
|
||||
AddLabel(available_label);
|
||||
const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
|
||||
const auto addr{OpIAdd(U64, bda, offset_in_bda)};
|
||||
OpBranch(merge_label);
|
||||
|
||||
// Merge
|
||||
AddLabel(merge_label);
|
||||
const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
|
||||
OpReturnValue(result);
|
||||
OpFunctionEnd();
|
||||
return func;
|
||||
}
|
||||
|
||||
Id EmitContext::DefineReadConst(bool dynamic) {
|
||||
const auto func_type{!dynamic ? TypeFunction(U32[1], U32[2], U32[1], U32[1])
|
||||
: TypeFunction(U32[1], U32[2], U32[1])};
|
||||
const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const auto base{OpFunctionParameter(U32[2])};
|
||||
const auto offset{OpFunctionParameter(U32[1])};
|
||||
const auto flatbuf_offset{!dynamic ? OpFunctionParameter(U32[1]) : Id{}};
|
||||
Name(func, dynamic ? "read_const_dynamic" : "read_const");
|
||||
AddLabel();
|
||||
|
||||
const auto base_lo{OpUConvert(U64, OpCompositeExtract(U32[1], base, 0))};
|
||||
const auto base_hi{OpUConvert(U64, OpCompositeExtract(U32[1], base, 1))};
|
||||
const auto base_shift{OpShiftLeftLogical(U64, base_hi, ConstU32(32U))};
|
||||
const auto base_addr{OpBitwiseOr(U64, base_lo, base_shift)};
|
||||
const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
|
||||
const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
|
||||
|
||||
const auto result = EmitMemoryRead(U32[1], addr, [&]() {
|
||||
if (dynamic) {
|
||||
return u32_zero_value;
|
||||
} else {
|
||||
const auto& flatbuf_buffer{buffers[flatbuf_index]};
|
||||
ASSERT(flatbuf_buffer.binding >= 0 &&
|
||||
flatbuf_buffer.buffer_type == BufferType::Flatbuf);
|
||||
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
|
||||
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
|
||||
flatbuf_offset)};
|
||||
return OpLoad(U32[1], ptr);
|
||||
}
|
||||
});
|
||||
|
||||
OpReturnValue(result);
|
||||
OpFunctionEnd();
|
||||
return func;
|
||||
}
|
||||
|
||||
void EmitContext::DefineFunctions() {
|
||||
if (info.uses_pack_10_11_11) {
|
||||
f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
|
||||
@ -1082,6 +1172,18 @@ void EmitContext::DefineFunctions() {
|
||||
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
|
||||
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
|
||||
}
|
||||
if (info.dma_types != IR::Type::Void) {
|
||||
get_bda_pointer = DefineGetBdaPointer();
|
||||
}
|
||||
|
||||
if (True(info.readconst_types & Info::ReadConstType::Immediate)) {
|
||||
LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses immediate ReadConst", info.pgm_hash);
|
||||
read_const = DefineReadConst(false);
|
||||
}
|
||||
if (True(info.readconst_types & Info::ReadConstType::Dynamic)) {
|
||||
LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses dynamic ReadConst", info.pgm_hash);
|
||||
read_const_dynamic = DefineReadConst(true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -173,55 +173,33 @@ public:
|
||||
UNREACHABLE_MSG("Unknown type for pointer");
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
Id EmitMemoryAccess(Id type, Id address, Func&& fallback) {
|
||||
const Id fault_label = OpLabel();
|
||||
Id EmitMemoryRead(Id type, Id address, auto&& fallback) {
|
||||
const Id available_label = OpLabel();
|
||||
const Id fallback_label = OpLabel();
|
||||
const Id merge_label = OpLabel();
|
||||
|
||||
// Get page BDA
|
||||
const Id page = OpShiftRightLogical(U64, address, caching_pagebits_value);
|
||||
const Id page32 = OpUConvert(U32[1], page);
|
||||
const auto& bda_buffer = buffers[bda_pagetable_index];
|
||||
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
|
||||
const Id bda_ptr = OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32);
|
||||
const Id bda = OpLoad(U64, bda_ptr);
|
||||
|
||||
// Check if the page is GPU mapped
|
||||
const Id is_fault = OpIEqual(U1[1], bda, u64_zero_value);
|
||||
const Id addr = OpFunctionCall(U64, get_bda_pointer, address);
|
||||
const Id is_available = OpINotEqual(U1[1], addr, u64_zero_value);
|
||||
OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
|
||||
OpBranchConditional(is_fault, fault_label, available_label);
|
||||
OpBranchConditional(is_available, available_label, fallback_label);
|
||||
|
||||
// First time access
|
||||
AddLabel(fault_label);
|
||||
const auto& fault_buffer = buffers[fault_buffer_index];
|
||||
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
|
||||
const Id page_div8 = OpShiftRightLogical(U32[1], page32, u32_three_value);
|
||||
const Id page_mod8 = OpBitwiseAnd(U32[1], page32, u32_seven_value);
|
||||
const Id page_mask = OpShiftLeftLogical(U32[1], u32_one_value, page_mod8);
|
||||
const Id fault_ptr =
|
||||
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8);
|
||||
const Id fault_value = OpLoad(U8, fault_ptr);
|
||||
const Id page_mask8 = OpUConvert(U8, page_mask);
|
||||
const Id fault_value_masked = OpBitwiseOr(U8, fault_value, page_mask8);
|
||||
OpStore(fault_ptr, fault_value_masked);
|
||||
// Fallback (we are not able to access the page)
|
||||
const Id fallback_result = fallback();
|
||||
OpBranch(merge_label);
|
||||
|
||||
// Value is available
|
||||
// Available
|
||||
AddLabel(available_label);
|
||||
const Id offset_in_bda = OpBitwiseAnd(U64, address, caching_pagemask_value);
|
||||
const Id addr = OpIAdd(U64, bda, offset_in_bda);
|
||||
const PointerType pointer_type = PointerTypeFromType(type);
|
||||
const auto pointer_type = PointerTypeFromType(type);
|
||||
const Id pointer_type_id = physical_pointer_types[pointer_type];
|
||||
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
|
||||
const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
|
||||
OpBranch(merge_label);
|
||||
|
||||
// Fallback
|
||||
AddLabel(fallback_label);
|
||||
const Id fallback_result = fallback();
|
||||
OpBranch(merge_label);
|
||||
|
||||
// Merge
|
||||
AddLabel(merge_label);
|
||||
const Id final_result = OpPhi(type, fallback_result, fault_label, result, available_label);
|
||||
const Id final_result =
|
||||
OpPhi(type, fallback_result, fallback_label, result, available_label);
|
||||
return final_result;
|
||||
}
|
||||
|
||||
@ -255,16 +233,13 @@ public:
|
||||
|
||||
Id true_value{};
|
||||
Id false_value{};
|
||||
Id u32_seven_value{};
|
||||
Id u32_three_value{};
|
||||
Id u8_one_value{};
|
||||
Id u8_zero_value{};
|
||||
Id u32_one_value{};
|
||||
Id u32_zero_value{};
|
||||
Id f32_zero_value{};
|
||||
Id u64_zero_value{};
|
||||
Id u64_one_value{};
|
||||
|
||||
Id caching_pagebits_value{};
|
||||
Id caching_pagemask_value{};
|
||||
Id u64_zero_value{};
|
||||
|
||||
Id shared_u8{};
|
||||
Id shared_u16{};
|
||||
@ -403,6 +378,11 @@ public:
|
||||
Id uf10_to_f32{};
|
||||
Id f32_to_uf10{};
|
||||
|
||||
Id get_bda_pointer{};
|
||||
|
||||
Id read_const{};
|
||||
Id read_const_dynamic{};
|
||||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineInterfaces();
|
||||
@ -423,6 +403,10 @@ private:
|
||||
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
|
||||
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
|
||||
|
||||
Id DefineGetBdaPointer();
|
||||
|
||||
Id DefineReadConst(bool dynamic);
|
||||
|
||||
Id GetBufferSize(u32 sharp_idx);
|
||||
};
|
||||
|
||||
|
@ -217,12 +217,18 @@ struct Info {
|
||||
bool stores_tess_level_outer{};
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{};
|
||||
bool has_readconst{};
|
||||
IR::Type dma_types{IR::Type::Void};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
||||
enum class ReadConstType {
|
||||
None = 0,
|
||||
Immediate = 1 << 0,
|
||||
Dynamic = 1 << 1,
|
||||
};
|
||||
ReadConstType readconst_types{};
|
||||
IR::Type dma_types{IR::Type::Void};
|
||||
|
||||
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
|
||||
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
user_data{params.user_data} {}
|
||||
@ -280,6 +286,7 @@ struct Info {
|
||||
sizeof(tess_constants));
|
||||
}
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
|
||||
|
@ -80,15 +80,19 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
info.uses_lane_id = true;
|
||||
break;
|
||||
case IR::Opcode::ReadConst:
|
||||
if (!info.has_readconst) {
|
||||
if (info.readconst_types == Info::ReadConstType::None) {
|
||||
info.buffers.push_back({
|
||||
.used_types = IR::Type::U32,
|
||||
// We can't guarantee that flatbuf will now grow bast UBO
|
||||
// We can't guarantee that flatbuf will not grow past UBO
|
||||
// limit if there are a lot of ReadConsts. (We could specialize)
|
||||
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
|
||||
.buffer_type = BufferType::Flatbuf,
|
||||
});
|
||||
info.has_readconst = true;
|
||||
}
|
||||
if (inst.Flags<u32>() != 0) {
|
||||
info.readconst_types |= Info::ReadConstType::Immediate;
|
||||
} else {
|
||||
info.readconst_types |= Info::ReadConstType::Dynamic;
|
||||
}
|
||||
info.dma_types |= IR::Type::U32;
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user