video_core: cpu flip is propagated via gpu thread now

This commit is contained in:
psucien 2024-08-13 11:29:13 +02:00
parent d1a033b6af
commit 2beff82c1b
7 changed files with 66 additions and 12 deletions

View File

@ -9,6 +9,7 @@
#include "core/libraries/error_codes.h" #include "core/libraries/error_codes.h"
#include "core/libraries/kernel/time_management.h" #include "core/libraries/kernel/time_management.h"
#include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/driver.h"
#include "core/platform.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/renderer_vulkan.h"
extern std::unique_ptr<Vulkan::RendererVulkan> renderer; extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
@ -202,9 +203,26 @@ std::chrono::microseconds VideoOutDriver::Flip(const Request& req) {
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) { bool is_eop /*= false*/) {
bool flip_result = true;
if (!is_eop) {
// Before processing the flip we need to ask GPU thread to flush command list as at this
// point VO surface is ready to be presented, and we will need have an actual state of
// Vulkan image at the time of frame presentation.
liverpool->SendCommand([=, this]() {
renderer->FlushDraw();
SubmitFlipInternal(port, index, flip_arg, is_eop);
});
} else {
flip_result = SubmitFlipInternal(port, index, flip_arg, is_eop);
}
return flip_result;
}
bool VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) {
Vulkan::Frame* frame; Vulkan::Frame* frame;
if (index == -1) { if (index == -1) {
frame = renderer->PrepareBlankFrame(); frame = renderer->PrepareBlankFrame(is_eop);
} else { } else {
const auto& buffer = port->buffer_slots[index]; const auto& buffer = port->buffer_slots[index];
const auto& group = port->groups[buffer.group_index]; const auto& group = port->groups[buffer.group_index];

View File

@ -102,6 +102,7 @@ private:
}; };
std::chrono::microseconds Flip(const Request& req); std::chrono::microseconds Flip(const Request& req);
bool SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
void PresentThread(std::stop_token token); void PresentThread(std::stop_token token);
std::mutex mutex; std::mutex mutex;

View File

@ -35,7 +35,7 @@ void Liverpool::Process(std::stop_token stoken) {
{ {
std::unique_lock lk{submit_mutex}; std::unique_lock lk{submit_mutex};
Common::CondvarWait(submit_cv, lk, stoken, Common::CondvarWait(submit_cv, lk, stoken,
[this] { return num_submits != 0 || submit_done; }); [this] { return num_commands || num_submits || submit_done; });
} }
if (stoken.stop_requested()) { if (stoken.stop_requested()) {
break; break;
@ -45,7 +45,23 @@ void Liverpool::Process(std::stop_token stoken) {
int qid = -1; int qid = -1;
while (num_submits) { while (num_submits || num_commands) {
// Process incoming commands with high priority
while (num_commands) {
Common::UniqueFunction<void> callback{};
{
std::unique_lock lk{submit_mutex};
callback = std::move(command_queue.back());
command_queue.pop();
}
callback();
--num_commands;
}
qid = (qid + 1) % NumTotalQueues; qid = (qid + 1) % NumTotalQueues;
auto& queue = mapped_queues[qid]; auto& queue = mapped_queues[qid];
@ -219,7 +235,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
// In the case of HW, render target memory has alignment as color block operates on // In the case of HW, render target memory has alignment as color block operates on
// tiles. There is no information of actual resource extents stored in CB context // tiles. There is no information of actual resource extents stored in CB context
// regs, so any deduction of it from slices/pitch will lead to a larger surface created. // regs, so any deduction of it from slices/pitch will lead to a larger surface created.
// The same applies to the depth targets. Fortunatelly, the guest always sends // The same applies to the depth targets. Fortunately, the guest always sends
// a trailing NOP packet right after the context regs setup, so we can use the heuristic // a trailing NOP packet right after the context regs setup, so we can use the heuristic
// below and extract the hint to determine actual resource dims. // below and extract the hint to determine actual resource dims.

View File

@ -11,10 +11,12 @@
#include <span> #include <span>
#include <thread> #include <thread>
#include <queue> #include <queue>
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/polyfill_thread.h" #include "common/polyfill_thread.h"
#include "common/types.h" #include "common/types.h"
#include "common/unique_function.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
@ -1025,6 +1027,13 @@ public:
rasterizer = rasterizer_; rasterizer = rasterizer_;
} }
void SendCommand(Common::UniqueFunction<void>&& func) {
std::scoped_lock lk{submit_mutex};
command_queue.emplace(std::move(func));
++num_commands;
submit_cv.notify_one();
}
private: private:
struct Task { struct Task {
struct promise_type { struct promise_type {
@ -1093,9 +1102,11 @@ private:
Libraries::VideoOut::VideoOutPort* vo_port{}; Libraries::VideoOut::VideoOutPort* vo_port{};
std::jthread process_thread{}; std::jthread process_thread{};
std::atomic<u32> num_submits{}; std::atomic<u32> num_submits{};
std::atomic<u32> num_commands{};
std::atomic<bool> submit_done{}; std::atomic<bool> submit_done{};
std::mutex submit_mutex; std::mutex submit_mutex;
std::condition_variable_any submit_cv; std::condition_variable_any submit_cv;
std::queue<Common::UniqueFunction<void>> command_queue{};
}; };
static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08);

View File

@ -48,13 +48,14 @@ public:
VAddr cpu_address, bool is_eop) { VAddr cpu_address, bool is_eop) {
const auto info = VideoCore::ImageInfo{attribute, cpu_address}; const auto info = VideoCore::ImageInfo{attribute, cpu_address};
const auto image_id = texture_cache.FindImage(info); const auto image_id = texture_cache.FindImage(info);
texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler);
auto& image = texture_cache.GetImage(image_id); auto& image = texture_cache.GetImage(image_id);
return PrepareFrameInternal(image, is_eop); return PrepareFrameInternal(image, is_eop);
} }
Frame* PrepareBlankFrame() { Frame* PrepareBlankFrame(bool is_eop) {
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID); auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
return PrepareFrameInternal(image, true); return PrepareFrameInternal(image, is_eop);
} }
VideoCore::Image& RegisterVideoOutSurface( VideoCore::Image& RegisterVideoOutSurface(
@ -75,6 +76,11 @@ public:
void Present(Frame* frame); void Present(Frame* frame);
void RecreateFrame(Frame* frame, u32 width, u32 height); void RecreateFrame(Frame* frame, u32 width, u32 height);
void FlushDraw() {
SubmitInfo info{};
draw_scheduler.Flush(info);
}
private: private:
Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true); Frame* PrepareFrameInternal(VideoCore::Image& image, bool is_eop = true);
Frame* GetRenderFrame(); Frame* GetRenderFrame();

View File

@ -223,7 +223,7 @@ ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info,
return RegisterImageView(image_id, view_info); return RegisterImageView(image_id, view_info);
} }
void TextureCache::RefreshImage(Image& image) { void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
// Mark image as validated. // Mark image as validated.
image.flags &= ~ImageFlagBits::CpuModified; image.flags &= ~ImageFlagBits::CpuModified;
@ -269,8 +269,10 @@ void TextureCache::RefreshImage(Image& image) {
return; return;
} }
scheduler.EndRendering(); auto* sched_ptr = custom_scheduler ? custom_scheduler : &scheduler;
const auto cmdbuf = scheduler.CommandBuffer(); sched_ptr->EndRendering();
const auto cmdbuf = sched_ptr->CommandBuffer();
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf); image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits::eTransferWrite, cmdbuf);
const VAddr image_addr = image.info.guest_address; const VAddr image_addr = image.info.guest_address;

View File

@ -59,17 +59,17 @@ public:
const ImageViewInfo& view_info); const ImageViewInfo& view_info);
/// Updates image contents if it was modified by CPU. /// Updates image contents if it was modified by CPU.
void UpdateImage(ImageId image_id) { void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) {
Image& image = slot_images[image_id]; Image& image = slot_images[image_id];
if (False(image.flags & ImageFlagBits::CpuModified)) { if (False(image.flags & ImageFlagBits::CpuModified)) {
return; return;
} }
RefreshImage(image); RefreshImage(image, custom_scheduler);
TrackImage(image, image_id); TrackImage(image, image_id);
} }
/// Reuploads image contents. /// Reuploads image contents.
void RefreshImage(Image& image); void RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler = nullptr);
/// Retrieves the sampler that matches the provided S# descriptor. /// Retrieves the sampler that matches the provided S# descriptor.
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler); [[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);