This commit is contained in:
Marcin Mikołajczyk 2025-06-04 07:22:20 +02:00 committed by GitHub
commit ac65e57708
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 242 additions and 5 deletions

View File

@ -239,6 +239,11 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case 3:
const u32 count = header->type3.NumWords();
const PM4ItOpcode opcode = header->type3.opcode;
const auto predicate = header->type3.predicate;
if (predicate == PM4Predicate::PredEnable) {
LOG_DEBUG(Render_Vulkan, "PM4 command {} is predicated",
magic_enum::enum_name(opcode));
}
switch (opcode) {
case PM4ItOpcode::Nop: {
const auto* nop = reinterpret_cast<const PM4CmdNop*>(header);
@ -394,7 +399,25 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
break;
}
case PM4ItOpcode::SetPredication: {
LOG_WARNING(Render_Vulkan, "Unimplemented IT_SET_PREDICATION");
const auto* predication = reinterpret_cast<const PM4CmdSetPredication*>(header);
if (predication->continue_bit.Value()) {
LOG_WARNING(Render_Vulkan, "unhandled continue bit in predication command");
}
if (predication->pred_op.Value() == PredicateOperation::Clear) {
if (rasterizer) {
rasterizer->EndPredication();
}
} else if (predication->pred_op.Value() == PredicateOperation::Zpass) {
if (rasterizer) {
rasterizer->StartPredication(
predication->Address<VAddr>(),
predication->action.Value() == Predication::DrawIfVisible,
predication->hint.Value() == PredicationHint::Wait);
}
} else {
LOG_WARNING(Render_Vulkan, "unhandled predicate operation {}",
magic_enum::enum_name(predication->pred_op.Value()));
}
break;
}
case PM4ItOpcode::IndexType: {
@ -595,6 +618,24 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
// immediately
regs.cp_strmout_cntl.offset_update_done = 1;
}
if (event->event_index.Value() == EventIndex::ZpassDone) {
if (event->event_type.Value() == EventType::PixelPipeStatControl) {
} else if (event->event_type.Value() == EventType::PixelPipeStatDump) {
if ((event->Address<u64>() & 0x8) == 0) {
// occlusion query start
if (rasterizer) {
rasterizer->StartOcclusionQuery(event->Address<VAddr>());
}
} else {
// occlusion query end
if (rasterizer) {
rasterizer->EndOcclusionQuery(event->Address<VAddr>() & ~0xF);
}
}
}
}
break;
}
case PM4ItOpcode::EventWriteEos: {

View File

@ -415,6 +415,13 @@ struct PM4CmdEventWrite {
BitField<20, 1, u32> inv_l2; ///< Send WBINVL2 op to the TC L2 cache when EVENT_INDEX = 0111
};
u32 address[];
template <typename T>
T Address() const {
ASSERT(event_index.Value() >= EventIndex::ZpassDone &&
event_index.Value() <= EventIndex::SampleStreamoutStatSx);
return std::bit_cast<T>((u64(address[1]) << 32u) | u64(address[0]));
}
};
struct PM4CmdEventWriteEop {
@ -1104,4 +1111,43 @@ struct PM4CmdMemSemaphore {
}
};
enum class Predication : u32 {
DrawIfNotVisible = 0,
DrawIfVisible = 1,
};
enum class PredicationHint : u32 {
Wait = 0,
Draw = 1,
};
enum class PredicateOperation : u32 {
Clear = 0,
Zpass = 1,
PrimCount = 2,
// other values are reserved
};
struct PM4CmdSetPredication {
PM4Type3Header header;
union {
BitField<4, 28, u32> start_address_lo;
u32 raw1;
};
union {
BitField<0, 8, u32> start_address_hi;
BitField<8, 1, Predication> action;
BitField<12, 1, PredicationHint> hint;
BitField<16, 3, PredicateOperation> pred_op;
BitField<31, 1, u32> continue_bit;
u32 raw2;
};
template <typename T = u64>
T Address() const {
return std::bit_cast<T>(u64(start_address_lo.Value()) << 4 | u64(start_address_hi.Value())
<< 32);
}
};
} // namespace AmdGpu

View File

@ -212,7 +212,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
vk::PhysicalDeviceConditionalRenderingFeaturesEXT>();
features = feature_chain.get().features;
const vk::StructureChain properties_chain = physical_device.getProperties2<
@ -283,6 +284,7 @@ bool Instance::CreateDevice() {
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
}
conditional_rendering = add_extension(VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME);
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@ -420,6 +422,9 @@ bool Instance::CreateDevice() {
.shaderImageFloat32AtomicMinMax =
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
},
vk::PhysicalDeviceConditionalRenderingFeaturesEXT{
.conditionalRendering = true,
},
#ifdef __APPLE__
portability_features,
#endif
@ -452,6 +457,9 @@ bool Instance::CreateDevice() {
if (!shader_atomic_float2) {
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
}
if (!conditional_rendering) {
device_chain.unlink<vk::PhysicalDeviceConditionalRenderingFeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View File

@ -191,6 +191,11 @@ public:
return !portability_subset || portability_features.tessellationPointMode;
}
/// Returns true when VK_EXT_conditional_rendering is supported by the device
bool IsConditionalRenderingSupported() const {
return conditional_rendering;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
@ -374,6 +379,7 @@ private:
bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{};
bool shader_atomic_float2{};
bool conditional_rendering{};
bool portability_subset{};
};

View File

@ -17,6 +17,10 @@
#undef MemoryBarrier
#endif
namespace {
const int OCCLUSION_QUERIES_COUNT = 256;
}
namespace Vulkan {
static Shader::PushData MakeUserData(const AmdGpu::Liverpool::Regs& regs) {
@ -38,11 +42,25 @@ Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_,
: instance{instance_}, scheduler{scheduler_}, page_manager{this},
buffer_cache{instance, scheduler, *this, liverpool_, texture_cache, page_manager},
texture_cache{instance, scheduler, buffer_cache, page_manager}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool} {
memory{Core::Memory::Instance()}, pipeline_cache{instance, scheduler, liverpool},
occlusion_query_buffer{instance,
scheduler,
VideoCore::MemoryUsage::DeviceLocal,
0,
vk::BufferUsageFlagBits::eConditionalRenderingEXT |
vk::BufferUsageFlagBits::eTransferDst,
sizeof(u32) * OCCLUSION_QUERIES_COUNT} {
if (!Config::nullGpu()) {
liverpool->BindRasterizer(this);
}
memory->SetRasterizer(this);
occlusion_query_pool = Check<"occlusion query pool">(instance.GetDevice().createQueryPool({
.queryType = vk::QueryType::eOcclusion,
.queryCount = OCCLUSION_QUERIES_COUNT,
}));
instance.GetDevice().resetQueryPool(occlusion_query_pool, 0, OCCLUSION_QUERIES_COUNT);
Vulkan::SetObjectName(instance.GetDevice(), occlusion_query_buffer.Handle(),
"OcclusionQueryBuffer:{:#x}", sizeof(u32) * OCCLUSION_QUERIES_COUNT);
}
Rasterizer::~Rasterizer() = default;
@ -302,6 +320,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (active_predication) {
cmdbuf.beginConditionalRenderingEXT(&*active_predication);
}
if (is_indexed) {
cmdbuf.drawIndexed(regs.num_indices, regs.num_instances.NumInstances(), 0,
s32(vertex_offset), instance_offset);
@ -309,7 +330,9 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), vertex_offset,
instance_offset);
}
if (active_predication) {
cmdbuf.endConditionalRenderingEXT();
}
ResetBindings();
}
@ -354,6 +377,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
if (active_predication) {
cmdbuf.beginConditionalRenderingEXT(&*active_predication);
}
if (is_indexed) {
ASSERT(sizeof(VkDrawIndexedIndirectCommand) == stride);
@ -373,7 +399,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
cmdbuf.drawIndirect(buffer->Handle(), base, max_count, stride);
}
}
if (active_predication) {
cmdbuf.endConditionalRenderingEXT();
}
ResetBindings();
}
@ -1263,4 +1291,102 @@ void Rasterizer::ScopedMarkerInsertColor(const std::string_view& str, const u32
(f32)(color & 0xff) / 255.0f, (f32)((color >> 24) & 0xff) / 255.0f})});
}
void Rasterizer::StartPredication(VAddr addr, bool draw_if_visible, bool wait_for_result) {
if (!instance.IsConditionalRenderingSupported()) {
return;
}
ASSERT(!active_predication);
ASSERT(occlusion_index_mapping.contains(addr));
auto index = occlusion_index_mapping[addr];
LOG_DEBUG(Render_Vulkan,
"addr = {:#x}, index = {}, draw_if_visible = {}, "
"wait_for_result = {}",
addr, index, draw_if_visible, wait_for_result);
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyQueryPoolResults(occlusion_query_pool, index, 1, occlusion_query_buffer.Handle(),
index * sizeof(u32), sizeof(u32),
wait_for_result ? vk::QueryResultFlagBits::eWait
: vk::QueryResultFlagBits::ePartial);
const auto pre_barrier = vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eCopy,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eCopy,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = occlusion_query_buffer.Handle(),
.offset = index * sizeof(u32),
.size = sizeof(u32),
};
const vk::MemoryBarrier2 ib_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eCopy,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eConditionalRenderingEXT,
.dstAccessMask = vk::AccessFlagBits2::eConditionalRenderingReadEXT,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.memoryBarrierCount = 1,
.pMemoryBarriers = &ib_barrier,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
ScopeMarkerBegin("gfx:{}:predication", fmt::ptr(reinterpret_cast<const void*>(addr)));
vk::ConditionalRenderingBeginInfoEXT conditional_rendering_info{
.buffer = occlusion_query_buffer.Handle(),
.offset = index * sizeof(u32),
.flags = draw_if_visible ? vk::ConditionalRenderingFlagBitsEXT::eInverted
: vk::ConditionalRenderingFlagsEXT(),
};
active_predication = conditional_rendering_info;
}
void Rasterizer::EndPredication() {
if (!active_predication) {
return;
}
LOG_DEBUG(Render_Vulkan, "");
scheduler.EndRendering();
ScopeMarkerEnd();
active_predication = std::nullopt;
}
void Rasterizer::StartOcclusionQuery(VAddr addr) {
LOG_DEBUG(Render_Vulkan, "addr = {:#x}, index = {}", addr, occlusion_current_index);
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.resetQueryPool(occlusion_query_pool, occlusion_current_index, 1);
ScopeMarkerBegin("gfx:{}:occlusionQuery", fmt::ptr(reinterpret_cast<const void*>(addr)));
cmdbuf.beginQuery(occlusion_query_pool, occlusion_current_index, vk::QueryControlFlags());
occlusion_index_mapping.insert_or_assign(addr, occlusion_current_index);
occlusion_current_index++;
if (occlusion_current_index > OCCLUSION_QUERIES_COUNT - 1) {
occlusion_current_index = 0;
}
}
void Rasterizer::EndOcclusionQuery(VAddr addr) {
ASSERT(occlusion_index_mapping.contains(addr));
auto index = occlusion_index_mapping[addr];
LOG_DEBUG(Render_Vulkan, "addr = {:#x}, index = {}", addr, index);
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.endQuery(occlusion_query_pool, index);
ScopeMarkerEnd();
}
} // namespace Vulkan

View File

@ -55,6 +55,11 @@ public:
void ScopedMarkerInsertColor(const std::string_view& str, const u32 color,
bool from_guest = false);
void StartPredication(VAddr addr, bool discard_if_zero, bool wait_for_result);
void EndPredication();
void StartOcclusionQuery(VAddr addr);
void EndOcclusionQuery(VAddr addr);
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
u32 ReadDataFromGds(u32 gsd_offset);
bool InvalidateMemory(VAddr addr, u64 size);
@ -122,6 +127,11 @@ private:
boost::icl::interval_set<VAddr> mapped_ranges;
std::shared_mutex mapped_ranges_mutex;
PipelineCache pipeline_cache;
vk::QueryPool occlusion_query_pool;
u32 occlusion_current_index{};
std::map<VAddr, u32> occlusion_index_mapping;
VideoCore::Buffer occlusion_query_buffer;
std::optional<vk::ConditionalRenderingBeginInfoEXT> active_predication;
boost::container::static_vector<
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>