mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 21:58:45 +00:00
Move presentation to separate thread/improve sync (#303)
* video_out: Move presentation to separate thread * liverpool: Better sync for CPU flips * driver: Make flip blocking * videoout: Proper flip rate and vblank management * config: Add vblank divider option * clang format * videoout: added `sceVideoOutWaitVblank` * clang format * vk_scheduler: Silly merge conflict * externals: Add renderdoc API * clang format * reuse * rdoc: manual capture trigger * clang fmt --------- Co-authored-by: psucien <168137814+psucien@users.noreply.github.com>
This commit is contained in:
@@ -5,8 +5,10 @@
|
||||
#include "common/debug.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/amdgpu/pm4_cmds.h"
|
||||
#include "video_core/renderdoc.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
@@ -32,12 +34,15 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
while (!stoken.stop_requested()) {
|
||||
{
|
||||
std::unique_lock lk{submit_mutex};
|
||||
Common::CondvarWait(submit_cv, lk, stoken, [this] { return num_submits != 0; });
|
||||
Common::CondvarWait(submit_cv, lk, stoken,
|
||||
[this] { return num_submits != 0 || submit_done; });
|
||||
}
|
||||
if (stoken.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
|
||||
VideoCore::StartCapture();
|
||||
|
||||
int qid = -1;
|
||||
|
||||
while (num_submits) {
|
||||
@@ -48,11 +53,9 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
Task::Handle task{};
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
|
||||
if (queue.submits.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
task = queue.submits.front();
|
||||
}
|
||||
task.resume();
|
||||
@@ -64,9 +67,20 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
queue.submits.pop();
|
||||
|
||||
--num_submits;
|
||||
std::scoped_lock lock2{submit_mutex};
|
||||
submit_cv.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_done) {
|
||||
VideoCore::EndCapture();
|
||||
|
||||
if (rasterizer) {
|
||||
rasterizer->Flush();
|
||||
}
|
||||
submit_done = false;
|
||||
}
|
||||
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle);
|
||||
}
|
||||
}
|
||||
@@ -365,8 +379,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto* write_data = reinterpret_cast<const PM4CmdWriteData*>(header);
|
||||
ASSERT(write_data->dst_sel.Value() == 2 || write_data->dst_sel.Value() == 5);
|
||||
const u32 data_size = (header->type3.count.Value() - 2) * 4;
|
||||
u64* address = write_data->Address<u64*>();
|
||||
if (!write_data->wr_one_addr.Value()) {
|
||||
std::memcpy(write_data->Address<void*>(), write_data->data, data_size);
|
||||
std::memcpy(address, write_data->data, data_size);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
@@ -379,6 +394,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::WaitRegMem: {
|
||||
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
|
||||
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
|
||||
// Optimization: VO label waits are special because the emulator
|
||||
// will write to the label when presentation is finished. So if
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) && num_submits == 1) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
|
||||
}
|
||||
while (!wait_reg_mem->Test()) {
|
||||
TracyFiberLeave;
|
||||
co_yield {};
|
||||
@@ -511,7 +534,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
|
||||
auto task = ProcessGraphics(dcb, ccb);
|
||||
{
|
||||
std::unique_lock lock{queue.m_access};
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
}
|
||||
|
||||
@@ -526,7 +549,7 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
|
||||
|
||||
const auto& task = ProcessCompute(acb, vqid);
|
||||
{
|
||||
std::unique_lock lock{queue.m_access};
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <array>
|
||||
#include <condition_variable>
|
||||
#include <coroutine>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <thread>
|
||||
@@ -21,6 +22,10 @@ namespace Vulkan {
|
||||
class Rasterizer;
|
||||
}
|
||||
|
||||
namespace Libraries::VideoOut {
|
||||
struct VideoOutPort;
|
||||
}
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
#define GFX6_3D_REG_INDEX(field_name) (offsetof(AmdGpu::Liverpool::Regs, field_name) / sizeof(u32))
|
||||
@@ -991,10 +996,25 @@ public:
|
||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||
void SubmitAsc(u32 vqid, std::span<const u32> acb);
|
||||
|
||||
void SubmitDone() noexcept {
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
submit_done = true;
|
||||
submit_cv.notify_one();
|
||||
}
|
||||
|
||||
void WaitGpuIdle() noexcept {
|
||||
std::unique_lock lk{submit_mutex};
|
||||
submit_cv.wait(lk, [this] { return num_submits == 0; });
|
||||
}
|
||||
|
||||
bool IsGpuIdle() const {
|
||||
return num_submits == 0;
|
||||
}
|
||||
|
||||
void SetVoPort(Libraries::VideoOut::VideoOutPort* port) {
|
||||
vo_port = port;
|
||||
}
|
||||
|
||||
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
@@ -1059,8 +1079,10 @@ private:
|
||||
} cblock{};
|
||||
|
||||
Vulkan::Rasterizer* rasterizer{};
|
||||
Libraries::VideoOut::VideoOutPort* vo_port{};
|
||||
std::jthread process_thread{};
|
||||
std::atomic<u32> num_submits{};
|
||||
std::atomic<bool> submit_done{};
|
||||
std::mutex submit_mutex;
|
||||
std::condition_variable_any submit_cv;
|
||||
};
|
||||
|
||||
@@ -404,8 +404,9 @@ struct PM4CmdWaitRegMem {
|
||||
u32 mask;
|
||||
u32 poll_interval;
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
|
||||
}
|
||||
|
||||
bool Test() const {
|
||||
@@ -464,8 +465,8 @@ struct PM4CmdWriteData {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* Address() const {
|
||||
return reinterpret_cast<T*>(addr64);
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(addr64);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -494,8 +495,9 @@ struct PM4CmdEventWriteEos {
|
||||
BitField<16, 16, u32> size; ///< Number of DWs to read from the GDS
|
||||
};
|
||||
|
||||
u32* Address() const {
|
||||
return reinterpret_cast<u32*>(address_lo | u64(address_hi) << 32);
|
||||
template <typename T = u32*>
|
||||
T Address() const {
|
||||
return reinterpret_cast<T>(address_lo | u64(address_hi) << 32);
|
||||
}
|
||||
|
||||
u32 DataDWord() const {
|
||||
|
||||
Reference in New Issue
Block a user