mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-30 22:14:56 +00:00
liverpool: cs state backup
This commit is contained in:
parent
622cdafd09
commit
c737ba7375
@ -218,9 +218,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||||||
if (info.has_image_query) {
|
if (info.has_image_query) {
|
||||||
ctx.AddCapability(spv::Capability::ImageQuery);
|
ctx.AddCapability(spv::Capability::ImageQuery);
|
||||||
}
|
}
|
||||||
// if (program.info.stores_frag_depth) {
|
if (info.stores.Get(IR::Attribute::Depth)) {
|
||||||
// ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||||
// }
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||||
|
@ -395,6 +395,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
|
|||||||
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
|
||||||
return spv::ImageFormat::Rgba8ui;
|
return spv::ImageFormat::Rgba8ui;
|
||||||
}
|
}
|
||||||
|
if (image.GetDataFmt() == AmdGpu::DataFormat::Format10_11_11 &&
|
||||||
|
image.GetNumberFmt() == AmdGpu::NumberFormat::Float) {
|
||||||
|
return spv::ImageFormat::R11fG11fB10f;
|
||||||
|
}
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,11 +76,11 @@ struct SMRD {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct InstControlSOPK {
|
struct InstControlSOPK {
|
||||||
BitField<0, 16, u32> simm;
|
s16 simm;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct InstControlSOPP {
|
struct InstControlSOPP {
|
||||||
BitField<0, 16, u32> simm;
|
s16 simm;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct InstControlVOP3 {
|
struct InstControlVOP3 {
|
||||||
|
@ -35,6 +35,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||||||
return S_CMP(ConditionOp::EQ, true, inst);
|
return S_CMP(ConditionOp::EQ, true, inst);
|
||||||
case Opcode::S_CMP_EQ_U32:
|
case Opcode::S_CMP_EQ_U32:
|
||||||
return S_CMP(ConditionOp::EQ, false, inst);
|
return S_CMP(ConditionOp::EQ, false, inst);
|
||||||
|
case Opcode::S_CMP_GE_U32:
|
||||||
|
return S_CMP(ConditionOp::GE, false, inst);
|
||||||
case Opcode::S_OR_B64:
|
case Opcode::S_OR_B64:
|
||||||
return S_OR_B64(NegateMode::None, false, inst);
|
return S_OR_B64(NegateMode::None, false, inst);
|
||||||
case Opcode::S_NOR_B64:
|
case Opcode::S_NOR_B64:
|
||||||
@ -77,6 +79,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||||||
return S_ADD_U32(inst);
|
return S_ADD_U32(inst);
|
||||||
case Opcode::S_ADDC_U32:
|
case Opcode::S_ADDC_U32:
|
||||||
return S_ADDC_U32(inst);
|
return S_ADDC_U32(inst);
|
||||||
|
case Opcode::S_ADDK_I32:
|
||||||
|
return S_ADDK_I32(inst);
|
||||||
|
case Opcode::S_MULK_I32:
|
||||||
|
return S_MULK_I32(inst);
|
||||||
case Opcode::S_SUB_U32:
|
case Opcode::S_SUB_U32:
|
||||||
case Opcode::S_SUB_I32:
|
case Opcode::S_SUB_I32:
|
||||||
return S_SUB_U32(inst);
|
return S_SUB_U32(inst);
|
||||||
@ -88,7 +94,7 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_MOVK(const GcnInst& inst) {
|
void Translator::S_MOVK(const GcnInst& inst) {
|
||||||
const auto simm16 = inst.control.sopk.simm.Value();
|
const auto simm16 = inst.control.sopk.simm;
|
||||||
if (simm16 & (1 << 15)) {
|
if (simm16 & (1 << 15)) {
|
||||||
// TODO: need to verify the case of imm sign extension
|
// TODO: need to verify the case of imm sign extension
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
@ -96,6 +102,16 @@ void Translator::S_MOVK(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.Imm32(simm16));
|
SetDst(inst.dst[0], ir.Imm32(simm16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::S_ADDK_I32(const GcnInst& inst) {
|
||||||
|
const s32 simm16 = inst.control.sopk.simm;
|
||||||
|
SetDst(inst.dst[0], ir.IAdd(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Translator::S_MULK_I32(const GcnInst& inst) {
|
||||||
|
const s32 simm16 = inst.control.sopk.simm;
|
||||||
|
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.dst[0]), ir.Imm32(simm16)));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::S_MOV(const GcnInst& inst) {
|
void Translator::S_MOV(const GcnInst& inst) {
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
||||||
}
|
}
|
||||||
|
@ -92,6 +92,8 @@ public:
|
|||||||
void S_SUB_U32(const GcnInst& inst);
|
void S_SUB_U32(const GcnInst& inst);
|
||||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||||
void S_ADDC_U32(const GcnInst& inst);
|
void S_ADDC_U32(const GcnInst& inst);
|
||||||
|
void S_MULK_I32(const GcnInst& inst);
|
||||||
|
void S_ADDK_I32(const GcnInst& inst);
|
||||||
|
|
||||||
// Scalar Memory
|
// Scalar Memory
|
||||||
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
|
||||||
@ -157,6 +159,7 @@ public:
|
|||||||
void V_BCNT_U32_B32(const GcnInst& inst);
|
void V_BCNT_U32_B32(const GcnInst& inst);
|
||||||
void V_COS_F32(const GcnInst& inst);
|
void V_COS_F32(const GcnInst& inst);
|
||||||
void V_MAX3_F32(const GcnInst& inst);
|
void V_MAX3_F32(const GcnInst& inst);
|
||||||
|
void V_MAX3_U32(const GcnInst& inst);
|
||||||
void V_CVT_I32_F32(const GcnInst& inst);
|
void V_CVT_I32_F32(const GcnInst& inst);
|
||||||
void V_MIN_I32(const GcnInst& inst);
|
void V_MIN_I32(const GcnInst& inst);
|
||||||
void V_MUL_LO_U32(const GcnInst& inst);
|
void V_MUL_LO_U32(const GcnInst& inst);
|
||||||
|
@ -215,6 +215,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||||||
return V_RNDNE_F32(inst);
|
return V_RNDNE_F32(inst);
|
||||||
case Opcode::V_MAX3_F32:
|
case Opcode::V_MAX3_F32:
|
||||||
return V_MAX3_F32(inst);
|
return V_MAX3_F32(inst);
|
||||||
|
case Opcode::V_MAX3_U32:
|
||||||
|
return V_MAX3_U32(inst);
|
||||||
case Opcode::V_TRUNC_F32:
|
case Opcode::V_TRUNC_F32:
|
||||||
return V_TRUNC_F32(inst);
|
return V_TRUNC_F32(inst);
|
||||||
case Opcode::V_CEIL_F32:
|
case Opcode::V_CEIL_F32:
|
||||||
@ -764,6 +766,13 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
|
|||||||
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Translator::V_MAX3_U32(const GcnInst& inst) {
|
||||||
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
|
const IR::U32 src2{GetSrc(inst.src[2])};
|
||||||
|
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2)));
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
void Translator::V_CVT_I32_F32(const GcnInst& inst) {
|
||||||
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
const IR::F32 src0{GetSrc(inst.src[0], true)};
|
||||||
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
SetDst(inst.dst[0], ir.ConvertFToS(32, src0));
|
||||||
|
@ -94,7 +94,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
|||||||
|
|
||||||
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
const IR::ScalarReg tsharp_reg{inst.src[2].code};
|
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
|
||||||
const auto flags = ImageResFlags(inst.control.mimg.dmask);
|
const auto flags = ImageResFlags(inst.control.mimg.dmask);
|
||||||
const bool has_mips = flags.test(ImageResComponent::MipCount);
|
const bool has_mips = flags.test(ImageResComponent::MipCount);
|
||||||
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
|
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
#pragma clang optimize off
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
@ -435,8 +435,8 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const u32 stride = buffer.GetStride();
|
const u32 stride = buffer.GetStride();
|
||||||
ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}",
|
//ASSERT_MSG(stride >= 4, "non-formatting load_buffer_* is not implemented for stride {}",
|
||||||
stride);
|
// stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
|
IR::U32 address = ir.Imm32(inst_info.inst_offset.Value());
|
||||||
@ -477,18 +477,14 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||||
const IR::Inst* producer = result.value();
|
const IR::Inst* producer = result.value();
|
||||||
auto [tsharp_handle, ssharp_handle] = [&] -> std::pair<const IR::Inst*, const IR::Inst*> {
|
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||||
if (producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2) {
|
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||||
return std::make_pair(producer->Arg(0).InstRecursive(),
|
|
||||||
producer->Arg(1).InstRecursive());
|
|
||||||
}
|
|
||||||
return std::make_pair(producer, nullptr);
|
|
||||||
}();
|
|
||||||
|
|
||||||
// Read image sharp.
|
// Read image sharp.
|
||||||
const auto tsharp = TrackSharp(tsharp_handle);
|
const auto tsharp = TrackSharp(tsharp_handle);
|
||||||
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
const auto image = info.ReadUd<AmdGpu::Image>(tsharp.sgpr_base, tsharp.dword_offset);
|
||||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||||
|
ASSERT(image.GetType() != AmdGpu::ImageType::Buffer);
|
||||||
u32 image_binding = descriptors.Add(ImageResource{
|
u32 image_binding = descriptors.Add(ImageResource{
|
||||||
.sgpr_base = tsharp.sgpr_base,
|
.sgpr_base = tsharp.sgpr_base,
|
||||||
.dword_offset = tsharp.dword_offset,
|
.dword_offset = tsharp.dword_offset,
|
||||||
@ -499,15 +495,28 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||||
if (ssharp_handle) {
|
if (has_sampler) {
|
||||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
u32 sampler_binding{};
|
||||||
const auto ssharp = TrackSharp(ssharp_ud);
|
const IR::Value& handle = producer->Arg(1);
|
||||||
const u32 sampler_binding = descriptors.Add(SamplerResource{
|
// Inline sampler resource.
|
||||||
.sgpr_base = ssharp.sgpr_base,
|
if (handle.IsImmediate()) {
|
||||||
.dword_offset = ssharp.dword_offset,
|
sampler_binding = descriptors.Add(SamplerResource{
|
||||||
.associated_image = image_binding,
|
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||||
.disable_aniso = disable_aniso,
|
.dword_offset = 0,
|
||||||
});
|
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Normal sampler resource.
|
||||||
|
const auto ssharp_handle = handle.InstRecursive();
|
||||||
|
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||||
|
const auto ssharp = TrackSharp(ssharp_ud);
|
||||||
|
sampler_binding = descriptors.Add(SamplerResource{
|
||||||
|
.sgpr_base = ssharp.sgpr_base,
|
||||||
|
.dword_offset = ssharp.dword_offset,
|
||||||
|
.associated_image = image_binding,
|
||||||
|
.disable_aniso = disable_aniso,
|
||||||
|
});
|
||||||
|
}
|
||||||
image_binding |= (sampler_binding << 16);
|
image_binding |= (sampler_binding << 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -610,7 +619,7 @@ void ResourceTrackingPass(IR::Program& program) {
|
|||||||
// Iterate resource instructions and patch them after finding the sharp.
|
// Iterate resource instructions and patch them after finding the sharp.
|
||||||
auto& info = program.info;
|
auto& info = program.info;
|
||||||
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
Descriptors descriptors{info.buffers, info.images, info.samplers};
|
||||||
for (IR::Block* const block : program.post_order_blocks) {
|
for (IR::Block* const block : program.blocks) {
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
if (IsBufferInstruction(inst)) {
|
if (IsBufferInstruction(inst)) {
|
||||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||||
|
@ -97,8 +97,11 @@ using ImageResourceList = boost::container::static_vector<ImageResource, 16>;
|
|||||||
struct SamplerResource {
|
struct SamplerResource {
|
||||||
u32 sgpr_base;
|
u32 sgpr_base;
|
||||||
u32 dword_offset;
|
u32 dword_offset;
|
||||||
|
AmdGpu::Sampler inline_sampler{};
|
||||||
u32 associated_image : 4;
|
u32 associated_image : 4;
|
||||||
u32 disable_aniso : 1;
|
u32 disable_aniso : 1;
|
||||||
|
|
||||||
|
constexpr AmdGpu::Sampler GetSsharp(const Info& info) const noexcept;
|
||||||
};
|
};
|
||||||
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
using SamplerResourceList = boost::container::static_vector<SamplerResource, 16>;
|
||||||
|
|
||||||
@ -196,6 +199,10 @@ constexpr AmdGpu::Buffer BufferResource::GetVsharp(const Info& info) const noexc
|
|||||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr AmdGpu::Sampler SamplerResource::GetSsharp(const Info& info) const noexcept {
|
||||||
|
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -403,9 +403,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
|||||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||||
}
|
}
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
|
mapped_queues[GfxQueueId].cs_state = regs.cs_program;
|
||||||
TracyFiberLeave;
|
TracyFiberLeave;
|
||||||
co_yield {};
|
co_yield {};
|
||||||
TracyFiberEnter(dcb_task_name);
|
TracyFiberEnter(dcb_task_name);
|
||||||
|
regs.cs_program = mapped_queues[GfxQueueId].cs_state;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -506,9 +508,11 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||||
while (!wait_reg_mem->Test()) {
|
while (!wait_reg_mem->Test()) {
|
||||||
|
mapped_queues[vqid].cs_state = regs.cs_program;
|
||||||
TracyFiberLeave;
|
TracyFiberLeave;
|
||||||
co_yield {};
|
co_yield {};
|
||||||
TracyFiberEnter(acb_task_name);
|
TracyFiberEnter(acb_task_name);
|
||||||
|
regs.cs_program = mapped_queues[vqid].cs_state;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -529,7 +533,6 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, int vqid) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||||
static constexpr u32 GfxQueueId = 0u;
|
|
||||||
auto& queue = mapped_queues[GfxQueueId];
|
auto& queue = mapped_queues[GfxQueueId];
|
||||||
|
|
||||||
auto task = ProcessGraphics(dcb, ccb);
|
auto task = ProcessGraphics(dcb, ccb);
|
||||||
|
@ -36,6 +36,7 @@ namespace AmdGpu {
|
|||||||
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
[[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
|
||||||
|
|
||||||
struct Liverpool {
|
struct Liverpool {
|
||||||
|
static constexpr u32 GfxQueueId = 0u;
|
||||||
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
static constexpr u32 NumGfxRings = 1u; // actually 2, but HP is reserved by system software
|
||||||
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
static constexpr u32 NumComputePipes = 7u; // actually 8, but #7 is reserved by system software
|
||||||
static constexpr u32 NumQueuesPerPipe = 8u;
|
static constexpr u32 NumQueuesPerPipe = 8u;
|
||||||
@ -1061,6 +1062,7 @@ private:
|
|||||||
struct GpuQueue {
|
struct GpuQueue {
|
||||||
std::mutex m_access{};
|
std::mutex m_access{};
|
||||||
std::queue<Task::Handle> submits{};
|
std::queue<Task::Handle> submits{};
|
||||||
|
ComputeProgram cs_state{};
|
||||||
};
|
};
|
||||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||||
|
|
||||||
|
@ -324,6 +324,7 @@ enum class BorderColor : u64 {
|
|||||||
// Table 8.12 Sampler Resource Definition
|
// Table 8.12 Sampler Resource Definition
|
||||||
struct Sampler {
|
struct Sampler {
|
||||||
union {
|
union {
|
||||||
|
u64 raw0;
|
||||||
BitField<0, 3, ClampMode> clamp_x;
|
BitField<0, 3, ClampMode> clamp_x;
|
||||||
BitField<3, 3, ClampMode> clamp_y;
|
BitField<3, 3, ClampMode> clamp_y;
|
||||||
BitField<6, 3, ClampMode> clamp_z;
|
BitField<6, 3, ClampMode> clamp_z;
|
||||||
@ -343,6 +344,7 @@ struct Sampler {
|
|||||||
BitField<60, 4, u64> perf_z;
|
BitField<60, 4, u64> perf_z;
|
||||||
};
|
};
|
||||||
union {
|
union {
|
||||||
|
u64 raw1;
|
||||||
BitField<0, 14, u64> lod_bias;
|
BitField<0, 14, u64> lod_bias;
|
||||||
BitField<14, 6, u64> lod_bias_sec;
|
BitField<14, 6, u64> lod_bias_sec;
|
||||||
BitField<20, 2, Filter> xy_mag_filter;
|
BitField<20, 2, Filter> xy_mag_filter;
|
||||||
@ -357,6 +359,10 @@ struct Sampler {
|
|||||||
BitField<62, 2, BorderColor> border_color_type;
|
BitField<62, 2, BorderColor> border_color_type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
operator bool() const noexcept {
|
||||||
|
return raw0 != 0 || raw1 != 0;
|
||||||
|
}
|
||||||
|
|
||||||
float LodBias() const noexcept {
|
float LodBias() const noexcept {
|
||||||
return static_cast<float>(static_cast<int16_t>((lod_bias.Value() ^ 0x2000u) - 0x2000u)) /
|
return static_cast<float>(static_cast<int16_t>((lod_bias.Value() ^ 0x2000u) - 0x2000u)) /
|
||||||
256.0f;
|
256.0f;
|
||||||
|
@ -297,6 +297,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||||||
vk::Format::eBc3UnormBlock,
|
vk::Format::eBc3UnormBlock,
|
||||||
vk::Format::eBc4UnormBlock,
|
vk::Format::eBc4UnormBlock,
|
||||||
vk::Format::eBc5UnormBlock,
|
vk::Format::eBc5UnormBlock,
|
||||||
|
vk::Format::eBc5SnormBlock,
|
||||||
vk::Format::eBc7SrgbBlock,
|
vk::Format::eBc7SrgbBlock,
|
||||||
vk::Format::eBc7UnormBlock,
|
vk::Format::eBc7UnormBlock,
|
||||||
vk::Format::eD16Unorm,
|
vk::Format::eD16Unorm,
|
||||||
@ -308,6 +309,7 @@ std::span<const vk::Format> GetAllFormats() {
|
|||||||
vk::Format::eR8G8B8A8Srgb,
|
vk::Format::eR8G8B8A8Srgb,
|
||||||
vk::Format::eR8G8B8A8Uint,
|
vk::Format::eR8G8B8A8Uint,
|
||||||
vk::Format::eR8G8B8A8Unorm,
|
vk::Format::eR8G8B8A8Unorm,
|
||||||
|
vk::Format::eR8G8B8A8Snorm,
|
||||||
vk::Format::eR8G8B8A8Uscaled,
|
vk::Format::eR8G8B8A8Uscaled,
|
||||||
vk::Format::eR8G8Snorm,
|
vk::Format::eR8G8Snorm,
|
||||||
vk::Format::eR8G8Uint,
|
vk::Format::eR8G8Uint,
|
||||||
@ -384,6 +386,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||||||
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
|
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) {
|
||||||
return vk::Format::eBc5UnormBlock;
|
return vk::Format::eBc5UnormBlock;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Snorm) {
|
||||||
|
return vk::Format::eBc5SnormBlock;
|
||||||
|
}
|
||||||
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
if (data_format == AmdGpu::DataFormat::Format16_16_16_16 &&
|
||||||
num_format == AmdGpu::NumberFormat::Sint) {
|
num_format == AmdGpu::NumberFormat::Sint) {
|
||||||
return vk::Format::eR16G16B16A16Sint;
|
return vk::Format::eR16G16B16A16Sint;
|
||||||
@ -518,6 +523,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
|
|||||||
num_format == AmdGpu::NumberFormat::SnormNz) {
|
num_format == AmdGpu::NumberFormat::SnormNz) {
|
||||||
return vk::Format::eR16G16B16A16Snorm;
|
return vk::Format::eR16G16B16A16Snorm;
|
||||||
}
|
}
|
||||||
|
if (data_format == AmdGpu::DataFormat::Format8_8_8_8 &&
|
||||||
|
num_format == AmdGpu::NumberFormat::Snorm) {
|
||||||
|
return vk::Format::eR8G8B8A8Snorm;
|
||||||
|
}
|
||||||
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,7 +148,7 @@ bool ComputePipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& sampler : info.samplers) {
|
for (const auto& sampler : info.samplers) {
|
||||||
const auto ssharp = info.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
const auto ssharp = sampler.GetSsharp(info);
|
||||||
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
const auto vk_sampler = texture_cache.GetSampler(ssharp);
|
||||||
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
|
@ -386,7 +386,7 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& sampler : stage.samplers) {
|
for (const auto& sampler : stage.samplers) {
|
||||||
auto ssharp = stage.ReadUd<AmdGpu::Sampler>(sampler.sgpr_base, sampler.dword_offset);
|
auto ssharp = sampler.GetSsharp(stage);
|
||||||
if (sampler.disable_aniso) {
|
if (sampler.disable_aniso) {
|
||||||
const auto& tsharp = tsharps[sampler.associated_image];
|
const auto& tsharp = tsharps[sampler.associated_image];
|
||||||
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
|
||||||
|
@ -261,6 +261,7 @@ bool Instance::CreateDevice() {
|
|||||||
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
.shaderStorageImageExtendedFormats = features.shaderStorageImageExtendedFormats,
|
||||||
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
|
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
|
||||||
.shaderClipDistance = features.shaderClipDistance,
|
.shaderClipDistance = features.shaderClipDistance,
|
||||||
|
.shaderInt64 = features.shaderInt64,
|
||||||
.shaderInt16 = features.shaderInt16,
|
.shaderInt16 = features.shaderInt16,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -302,6 +302,14 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||||||
block_pool.ReleaseContents();
|
block_pool.ReleaseContents();
|
||||||
inst_pool.ReleaseContents();
|
inst_pool.ReleaseContents();
|
||||||
|
|
||||||
|
if (compute_key == 0xa71733ca || compute_key == 0xa55ad01d) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compute_key == 4248155022) {
|
||||||
|
printf("test\n");
|
||||||
|
}
|
||||||
|
|
||||||
// Recompile shader to IR.
|
// Recompile shader to IR.
|
||||||
try {
|
try {
|
||||||
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
LOG_INFO(Render_Vulkan, "Compiling cs shader {:#x}", compute_key);
|
||||||
|
@ -23,7 +23,7 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
|||||||
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
: instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_},
|
||||||
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
liverpool{liverpool_}, memory{Core::Memory::Instance()},
|
||||||
pipeline_cache{instance, scheduler, liverpool},
|
pipeline_cache{instance, scheduler, liverpool},
|
||||||
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 1_GB, BufferType::Upload} {
|
vertex_index_buffer{instance, scheduler, VertexIndexFlags, 2_GB, BufferType::Upload} {
|
||||||
if (!Config::nullGpu()) {
|
if (!Config::nullGpu()) {
|
||||||
liverpool->BindRasterizer(this);
|
liverpool->BindRasterizer(this);
|
||||||
}
|
}
|
||||||
@ -91,6 +91,7 @@ void Rasterizer::DispatchDirect() {
|
|||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scheduler.EndRendering();
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
|
||||||
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
|
||||||
}
|
}
|
||||||
|
@ -54,9 +54,7 @@ void Scheduler::EndRendering() {
|
|||||||
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
for (size_t i = 0; i < render_state.num_color_attachments; ++i) {
|
||||||
barriers.push_back(vk::ImageMemoryBarrier{
|
barriers.push_back(vk::ImageMemoryBarrier{
|
||||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite |
|
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||||
vk::AccessFlagBits::eColorAttachmentRead |
|
|
||||||
vk::AccessFlagBits::eColorAttachmentWrite,
|
|
||||||
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
.oldLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||||
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
.newLayout = vk::ImageLayout::eColorAttachmentOptimal,
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
@ -226,7 +226,7 @@ void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
|||||||
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
||||||
auto& watch = previous_watches[wait_cursor];
|
auto& watch = previous_watches[wait_cursor];
|
||||||
wait_bound = watch.upper_bound;
|
wait_bound = watch.upper_bound;
|
||||||
scheduler.Wait(watch.tick);
|
//scheduler.Wait(watch.tick);
|
||||||
++wait_cursor;
|
++wait_cursor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -48,9 +48,9 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
|
bool IsIdentityMapping(u32 dst_sel, u32 num_components) {
|
||||||
return (num_components == 1 && (dst_sel == 0b100 || dst_sel == 0b001000000100)) ||
|
return (num_components == 1 && dst_sel == 0b100) ||
|
||||||
(num_components == 2 && (dst_sel == 0b101100 || dst_sel == 0b001000101100)) ||
|
(num_components == 2 && dst_sel == 0b101100) ||
|
||||||
(num_components == 3 && (dst_sel == 0b110101100 || dst_sel == 0b001110101100)) ||
|
(num_components == 3 && dst_sel == 0b110101100) ||
|
||||||
(num_components == 4 && dst_sel == 0b111110101100);
|
(num_components == 4 && dst_sel == 0b111110101100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user