mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-25 11:34:55 +00:00
Merge da38e0c2f3
into 23710f397e
This commit is contained in:
commit
ac65e57708
@ -239,6 +239,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case 3:
|
||||
const u32 count = header->type3.NumWords();
|
||||
const PM4ItOpcode opcode = header->type3.opcode;
|
||||
const auto predicate = header->type3.predicate;
|
||||
if (predicate == PM4Predicate::PredEnable) {
|
||||
LOG_DEBUG(Render_Vulkan, "PM4 command {} is predicated",
|
||||
magic_enum::enum_name(opcode));
|
||||
}
|
||||
switch (opcode) {
|
||||
case PM4ItOpcode::Nop: {
|
||||
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
|
||||
@ -394,7 +399,25 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::SetPredication: {
|
||||
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION");
|
||||
const auto* predication = reinterpret_cast<const PM4CmdSetPredication*>(header);
|
||||
if (predication->continue_bit.Value()) {
|
||||
LOG_WARNING(Render_Vulkan, "unhandled continue bit in predication command");
|
||||
}
|
||||
if (predication->pred_op.Value() == PredicateOperation::Clear) {
|
||||
if (rasterizer) {
|
||||
rasterizer->EndPredication();
|
||||
}
|
||||
} else if (predication->pred_op.Value() == PredicateOperation::Zpass) {
|
||||
if (rasterizer) {
|
||||
rasterizer->StartPredication(
|
||||
predication->Address<VAddr>(),
|
||||
predication->action.Value() == Predication::DrawIfVisible,
|
||||
predication->hint.Value() == PredicationHint::Wait);
|
||||
}
|
||||
} else {
|
||||
LOG_WARNING(Render_Vulkan, "unhandled predicate operation {}",
|
||||
magic_enum::enum_name(predication->pred_op.Value()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::IndexType: {
|
||||
@ -595,6 +618,24 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
// immediately
|
||||
regs.cp_strmout_cntl.offset_update_done = 1;
|
||||
}
|
||||
|
||||
if (event->event_index.Value() == EventIndex::ZpassDone) {
|
||||
if (event->event_type.Value() == EventType::PixelPipeStatControl) {
|
||||
|
||||
} else if (event->event_type.Value() == EventType::PixelPipeStatDump) {
|
||||
if ((event->Address<u64>() & 0x8) == 0) {
|
||||
// occlusion query start
|
||||
if (rasterizer) {
|
||||
rasterizer->StartOcclusionQuery(event->Address<VAddr>());
|
||||
}
|
||||
} else {
|
||||
// occlusion query end
|
||||
if (rasterizer) {
|
||||
rasterizer->EndOcclusionQuery(event->Address<VAddr>() & ~0xF);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWriteEos: {
|
||||
|
@ -415,6 +415,13 @@ struct PM4CmdEventWrite {
|
||||
BitField<20, 1, u32> inv_l2; ///< Send WBINVL2 op to the TC L2 cache when EVENT_INDEX = 0111
|
||||
};
|
||||
u32 address[];
|
||||
|
||||
template <typename T>
|
||||
T Address() const {
|
||||
ASSERT(event_index.Value() >= EventIndex::ZpassDone &&
|
||||
event_index.Value() <= EventIndex::SampleStreamoutStatSx);
|
||||
return std::bit_cast<T>((u64(address[1]) << 32u) | u64(address[0]));
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdEventWriteEop {
|
||||
@ -1104,4 +1111,43 @@ struct PM4CmdMemSemaphore {
|
||||
}
|
||||
};
|
||||
|
||||
enum class Predication : u32 {
|
||||
DrawIfNotVisible = 0,
|
||||
DrawIfVisible = 1,
|
||||
};
|
||||
|
||||
enum class PredicationHint : u32 {
|
||||
Wait = 0,
|
||||
Draw = 1,
|
||||
};
|
||||
|
||||
enum class PredicateOperation : u32 {
|
||||
Clear = 0,
|
||||
Zpass = 1,
|
||||
PrimCount = 2,
|
||||
// other values are reserved
|
||||
};
|
||||
|
||||
struct PM4CmdSetPredication {
|
||||
PM4Type3Header header;
|
||||
union {
|
||||
BitField<4, 28, u32> start_address_lo;
|
||||
u32 raw1;
|
||||
};
|
||||
union {
|
||||
BitField<0, 8, u32> start_address_hi;
|
||||
BitField<8, 1, Predication> action;
|
||||
BitField<12, 1, PredicationHint> hint;
|
||||
BitField<16, 3, PredicateOperation> pred_op;
|
||||
BitField<31, 1, u32> continue_bit;
|
||||
u32 raw2;
|
||||
};
|
||||
|
||||
template <typename T = u64>
|
||||
T Address() const {
|
||||
return std::bit_cast<T>(u64(start_address_lo.Value()) << 4 | u64(start_address_hi.Value())
|
||||
<< 32);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
@ -212,7 +212,8 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
|
||||
vk::PhysicalDeviceConditionalRenderingFeaturesEXT>();
|
||||
features = feature_chain.get().features;
|
||||
|
||||
const vk::StructureChain properties_chain = physical_device.getProperties2<
|
||||
@ -283,6 +284,7 @@ bool Instance::CreateDevice() {
|
||||
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
|
||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
|
||||
}
|
||||
conditional_rendering = add_extension(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
|
||||
const bool calibrated_timestamps =
|
||||
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
|
||||
|
||||
@ -420,6 +422,9 @@ bool Instance::CreateDevice() {
|
||||
.shaderImageFloat32AtomicMinMax =
|
||||
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
|
||||
},
|
||||
vk::PhysicalDeviceConditionalRenderingFeaturesEXT{
|
||||
.conditionalRendering = true,
|
||||
},
|
||||
#ifdef __APPLE__
|
||||
portability_features,
|
||||
#endif
|
||||
@ -452,6 +457,9 @@ bool Instance::CreateDevice() {
|
||||
if (!shader_atomic_float2) {
|
||||
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
|
||||
}
|
||||
if (!conditional_rendering) {
|
||||
device_chain.unlink<vk::PhysicalDeviceConditionalRenderingFeaturesEXT>();
|
||||
}
|
||||
|
||||
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
|
||||
if (device_result != vk::Result::eSuccess) {
|
||||
|
@ -191,6 +191,11 @@ public:
|
||||
return !portability_subset || portability_features.tessellationPointMode;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_conditional_rendering is supported by the device
|
||||
bool IsConditionalRenderingSupported() const {
|
||||
return conditional_rendering;
|
||||
}
|
||||
|
||||
/// Returns the vendor ID of the physical device
|
||||
u32 GetVendorID() const {
|
||||
return properties.vendorID;
|
||||
@ -374,6 +379,7 @@ private:
|
||||
bool amd_gcn_shader{};
|
||||
bool amd_shader_trinary_minmax{};
|
||||
bool shader_atomic_float2{};
|
||||
bool conditional_rendering{};
|
||||
bool portability_subset{};
|
||||
};
|
||||
|
||||
|
@ -17,6 +17,10 @@
|
||||
#undef MemoryBarrier
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
const int OCCLUSION_QUERIES_COUNT = 256;
|
||||
}
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
|
||||
@ -38,11 +42,25 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
|
||||
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
|
||||
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
|
||||
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
|
||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
|
||||
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool},
|
||||
occlusion_query_buffer{instance,
|
||||
scheduler,
|
||||
VideoCore::MemoryUsage::DeviceLocal,
|
||||
0,
|
||||
vk::BufferUsageFlagBits::eConditionalRenderingEXT |
|
||||
vk::BufferUsageFlagBits::eTransferDst,
|
||||
sizeof(u32) * OCCLUSION_QUERIES_COUNT} {
|
||||
if (!Config::nullGpu()) {
|
||||
liverpool->BindRasterizer(this);
|
||||
}
|
||||
memory->SetRasterizer(this);
|
||||
occlusion_query_pool = Check<"occlusion query pool">(instance.GetDevice().createQueryPool({
|
||||
.queryType = vk::QueryType::eOcclusion,
|
||||
.queryCount = OCCLUSION_QUERIES_COUNT,
|
||||
}));
|
||||
instance.GetDevice().resetQueryPool(occlusion_query_pool, 0, OCCLUSION_QUERIES_COUNT);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), occlusion_query_buffer.Handle(),
|
||||
"OcclusionQueryBuffer:{:#x}", sizeof(u32) * OCCLUSION_QUERIES_COUNT);
|
||||
}
|
||||
|
||||
Rasterizer::~Rasterizer() = default;
|
||||
@ -302,6 +320,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
|
||||
if (active_predication) {
|
||||
cmdbuf.beginConditionalRenderingEXT(&*active_predication);
|
||||
}
|
||||
if (is_indexed) {
|
||||
cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0,
|
||||
s32(vertex_offset), instance_offset);
|
||||
@ -309,7 +330,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset,
|
||||
instance_offset);
|
||||
}
|
||||
|
||||
if (active_predication) {
|
||||
cmdbuf.endConditionalRenderingEXT();
|
||||
}
|
||||
ResetBindings();
|
||||
}
|
||||
|
||||
@ -354,6 +377,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
|
||||
|
||||
if (active_predication) {
|
||||
cmdbuf.beginConditionalRenderingEXT(&*active_predication);
|
||||
}
|
||||
if (is_indexed) {
|
||||
ASSERT(sizeof(VkDrawIndexedIndirectCommand) == stride);
|
||||
|
||||
@ -373,7 +399,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
cmdbuf.drawIndirect(buffer->Handle(), base, max_count, stride);
|
||||
}
|
||||
}
|
||||
|
||||
if (active_predication) {
|
||||
cmdbuf.endConditionalRenderingEXT();
|
||||
}
|
||||
ResetBindings();
|
||||
}
|
||||
|
||||
@ -1263,4 +1291,102 @@ void Rasterizer::ScopedMarkerInsertColor(const std::string_view& str, const u32
|
||||
(f32)(color & 0xff) / 255.0f, (f32)((color >> 24) & 0xff) / 255.0f})});
|
||||
}
|
||||
|
||||
void Rasterizer::StartPredication(VAddr addr, bool draw_if_visible, bool wait_for_result) {
|
||||
if (!instance.IsConditionalRenderingSupported()) {
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(!active_predication);
|
||||
ASSERT(occlusion_index_mapping.contains(addr));
|
||||
|
||||
auto index = occlusion_index_mapping[addr];
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"addr = {:#x}, index = {}, draw_if_visible = {}, "
|
||||
"wait_for_result = {}",
|
||||
addr, index, draw_if_visible, wait_for_result);
|
||||
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
|
||||
cmdbuf.copyQueryPoolResults(occlusion_query_pool, index, 1, occlusion_query_buffer.Handle(),
|
||||
index * sizeof(u32), sizeof(u32),
|
||||
wait_for_result ? vk::QueryResultFlagBits::eWait
|
||||
: vk::QueryResultFlagBits::ePartial);
|
||||
|
||||
const auto pre_barrier = vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eCopy,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eCopy,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = occlusion_query_buffer.Handle(),
|
||||
.offset = index * sizeof(u32),
|
||||
.size = sizeof(u32),
|
||||
};
|
||||
|
||||
const vk::MemoryBarrier2 ib_barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eCopy,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eConditionalRenderingEXT,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eConditionalRenderingReadEXT,
|
||||
};
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.memoryBarrierCount = 1,
|
||||
.pMemoryBarriers = &ib_barrier,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
|
||||
ScopeMarkerBegin("gfx:{}:predication", fmt::ptr(reinterpret_cast<const void*>(addr)));
|
||||
vk::ConditionalRenderingBeginInfoEXT conditional_rendering_info{
|
||||
.buffer = occlusion_query_buffer.Handle(),
|
||||
.offset = index * sizeof(u32),
|
||||
.flags = draw_if_visible ? vk::ConditionalRenderingFlagBitsEXT::eInverted
|
||||
: vk::ConditionalRenderingFlagsEXT(),
|
||||
};
|
||||
|
||||
active_predication = conditional_rendering_info;
|
||||
}
|
||||
|
||||
void Rasterizer::EndPredication() {
|
||||
if (!active_predication) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_DEBUG(Render_Vulkan, "");
|
||||
|
||||
scheduler.EndRendering();
|
||||
ScopeMarkerEnd();
|
||||
active_predication = std::nullopt;
|
||||
}
|
||||
|
||||
void Rasterizer::StartOcclusionQuery(VAddr addr) {
|
||||
LOG_DEBUG(Render_Vulkan, "addr = {:#x}, index = {}", addr, occlusion_current_index);
|
||||
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.resetQueryPool(occlusion_query_pool, occlusion_current_index, 1);
|
||||
ScopeMarkerBegin("gfx:{}:occlusionQuery", fmt::ptr(reinterpret_cast<const void*>(addr)));
|
||||
cmdbuf.beginQuery(occlusion_query_pool, occlusion_current_index, vk::QueryControlFlags());
|
||||
|
||||
occlusion_index_mapping.insert_or_assign(addr, occlusion_current_index);
|
||||
|
||||
occlusion_current_index++;
|
||||
if (occlusion_current_index > OCCLUSION_QUERIES_COUNT - 1) {
|
||||
occlusion_current_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Rasterizer::EndOcclusionQuery(VAddr addr) {
|
||||
ASSERT(occlusion_index_mapping.contains(addr));
|
||||
|
||||
auto index = occlusion_index_mapping[addr];
|
||||
LOG_DEBUG(Render_Vulkan, "addr = {:#x}, index = {}", addr, index);
|
||||
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.endQuery(occlusion_query_pool, index);
|
||||
ScopeMarkerEnd();
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -55,6 +55,11 @@ public:
|
||||
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color,
|
||||
bool from_guest = false);
|
||||
|
||||
void StartPredication(VAddr addr, bool discard_if_zero, bool wait_for_result);
|
||||
void EndPredication();
|
||||
void StartOcclusionQuery(VAddr addr);
|
||||
void EndOcclusionQuery(VAddr addr);
|
||||
|
||||
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
|
||||
u32 ReadDataFromGds(u32 gsd_offset);
|
||||
bool InvalidateMemory(VAddr addr, u64 size);
|
||||
@ -122,6 +127,11 @@ private:
|
||||
boost::icl::interval_set<VAddr> mapped_ranges;
|
||||
std::shared_mutex mapped_ranges_mutex;
|
||||
PipelineCache pipeline_cache;
|
||||
vk::QueryPool occlusion_query_pool;
|
||||
u32 occlusion_current_index{};
|
||||
std::map<VAddr, u32> occlusion_index_mapping;
|
||||
VideoCore::Buffer occlusion_query_buffer;
|
||||
std::optional<vk::ConditionalRenderingBeginInfoEXT> active_predication;
|
||||
|
||||
boost::container::static_vector<
|
||||
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
|
||||
|
Loading…
Reference in New Issue
Block a user