Merge branch 'shadps4-emu:main' into mapmemory-assert-remove

This commit is contained in:
Stephen Miller 2025-05-15 06:11:26 -05:00 committed by GitHub
commit 88b1e0da8a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 365 additions and 54 deletions

View File

@ -54,9 +54,9 @@ else()
endif()
if (ARCHITECTURE STREQUAL "x86_64")
# Target the same CPU architecture as the PS4, to maintain the same level of compatibility.
# Exclude SSE4a as it is only available on AMD CPUs.
add_compile_options(-march=btver2 -mtune=generic -mno-sse4a)
# Target x86-64-v3 CPU architecture as this is a good balance between supporting performance critical
# instructions like AVX2 and maintaining support for older CPUs.
add_compile_options(-march=x86-64-v3)
endif()
if (APPLE AND ARCHITECTURE STREQUAL "x86_64" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64")

View File

@ -21,9 +21,9 @@ SPDX-License-Identifier: GPL-2.0-or-later
- A processor with at least 4 cores and 6 threads
- Above 2.5 GHz frequency
- A CPU supporting the following instruction sets: MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, F16C, CLMUL, AES, BMI1, MOVBE, XSAVE, ABM
- A CPU supporting the x86-64-v3 baseline.
- **Intel**: Haswell generation or newer
- **AMD**: Jaguar generation or newer
- **AMD**: Excavator generation or newer
- **Apple**: Rosetta 2 on macOS 15.4 or newer
### GPU
@ -55,4 +55,4 @@ To configure the emulator, you can go through the interface and go to "settings"
You can also configure the emulator by editing the `config.toml` file located in the `user` folder created after the application is started (Mostly useful if you are using the SDL version).
Some settings may be related to more technical development and debugging.\
For more information on this, see [**Debugging**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/Debugging/Debugging.md#configuration).
For more information on this, see [**Debugging**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/Debugging/Debugging.md#configuration).

View File

@ -179,7 +179,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u);
wait_reg_mem->poll_addr_lo = u32(addr & addr_mask);
wait_reg_mem->poll_addr_lo_raw = u32(addr & addr_mask);
wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
wait_reg_mem->ref = ref;
wait_reg_mem->mask = mask;

View File

@ -12,12 +12,25 @@
namespace Libraries::Kernel {
extern boost::asio::io_context io_context;
extern void KernelSignalRequest();
static constexpr auto HrTimerSpinlockThresholdUs = 1200u;
// Events are uniquely identified by id and filter.
bool EqueueInternal::AddEvent(EqueueEvent& event) {
std::scoped_lock lock{m_mutex};
event.time_added = std::chrono::steady_clock::now();
if (event.event.filter == SceKernelEvent::Filter::Timer ||
event.event.filter == SceKernelEvent::Filter::HrTimer) {
// HrTimer events are offset by the threshold of time at the end that we spinlock for
// greater accuracy.
const auto offset =
event.event.filter == SceKernelEvent::Filter::HrTimer ? HrTimerSpinlockThresholdUs : 0u;
event.timer_interval = std::chrono::microseconds(event.event.data - offset);
}
const auto& it = std::ranges::find(m_events, event);
if (it != m_events.cend()) {
@ -29,6 +42,47 @@ bool EqueueInternal::AddEvent(EqueueEvent& event) {
return true;
}
bool EqueueInternal::ScheduleEvent(u64 id, s16 filter,
void (*callback)(SceKernelEqueue, const SceKernelEvent&)) {
std::scoped_lock lock{m_mutex};
const auto& it = std::ranges::find_if(m_events, [id, filter](auto& ev) {
return ev.event.ident == id && ev.event.filter == filter;
});
if (it == m_events.cend()) {
return false;
}
const auto& event = *it;
ASSERT(event.event.filter == SceKernelEvent::Filter::Timer ||
event.event.filter == SceKernelEvent::Filter::HrTimer);
if (!it->timer) {
it->timer = std::make_unique<boost::asio::steady_timer>(io_context, event.timer_interval);
} else {
// If the timer already exists we are scheduling a reoccurrence after the next period.
// Set the expiration time to the previous occurrence plus the period.
it->timer->expires_at(it->timer->expiry() + event.timer_interval);
}
it->timer->async_wait(
[this, event_data = event.event, callback](const boost::system::error_code& ec) {
if (ec) {
if (ec != boost::system::errc::operation_canceled) {
LOG_ERROR(Kernel_Event, "Timer callback error: {}", ec.message());
} else {
// Timer was cancelled (removed) before it triggered
LOG_DEBUG(Kernel_Event, "Timer cancelled");
}
return;
}
callback(this, event_data);
});
KernelSignalRequest();
return true;
}
bool EqueueInternal::RemoveEvent(u64 id, s16 filter) {
bool has_found = false;
std::scoped_lock lock{m_mutex};
@ -152,18 +206,14 @@ int EqueueInternal::WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros) {
return count;
}
extern boost::asio::io_context io_context;
extern void KernelSignalRequest();
bool EqueueInternal::EventExists(u64 id, s16 filter) {
std::scoped_lock lock{m_mutex};
static constexpr auto HrTimerSpinlockThresholdUs = 1200u;
const auto& it = std::ranges::find_if(m_events, [id, filter](auto& ev) {
return ev.event.ident == id && ev.event.filter == filter;
});
static void SmallTimerCallback(const boost::system::error_code& error, SceKernelEqueue eq,
SceKernelEvent kevent) {
static EqueueEvent event;
event.event = kevent;
event.event.data = HrTimerSpinlockThresholdUs;
eq->AddSmallTimer(event);
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
return it != m_events.cend();
}
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
@ -243,6 +293,14 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int
return ORBIS_OK;
}
static void HrTimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) {
static EqueueEvent event;
event.event = kevent;
event.event.data = HrTimerSpinlockThresholdUs;
eq->AddSmallTimer(event);
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
}
s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
@ -273,17 +331,10 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec*
return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM;
}
event.timer = std::make_unique<boost::asio::steady_timer>(
io_context, std::chrono::microseconds(total_us - HrTimerSpinlockThresholdUs));
event.timer->async_wait(std::bind(SmallTimerCallback, std::placeholders::_1, eq, event.event));
if (!eq->AddEvent(event)) {
if (!eq->AddEvent(event) ||
!eq->ScheduleEvent(id, SceKernelEvent::Filter::HrTimer, HrTimerCallback)) {
return ORBIS_KERNEL_ERROR_ENOMEM;
}
KernelSignalRequest();
return ORBIS_OK;
}
@ -300,6 +351,57 @@ int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) {
}
}
static void TimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) {
if (eq->EventExists(kevent.ident, kevent.filter)) {
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::Timer, kevent.udata);
if (!(kevent.flags & SceKernelEvent::Flags::OneShot)) {
// Reschedule the event for its next period.
eq->ScheduleEvent(kevent.ident, kevent.filter, TimerCallback);
}
}
}
int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUseconds usec,
void* udata) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
EqueueEvent event{};
event.event.ident = static_cast<u64>(id);
event.event.filter = SceKernelEvent::Filter::Timer;
event.event.flags = SceKernelEvent::Flags::Add;
event.event.fflags = 0;
event.event.data = usec;
event.event.udata = udata;
if (eq->EventExists(event.event.ident, event.event.filter)) {
eq->RemoveEvent(id, SceKernelEvent::Filter::Timer);
LOG_DEBUG(Kernel_Event,
"Timer event already exists, removing it: queue name={}, queue id={}",
eq->GetName(), event.event.ident);
}
LOG_DEBUG(Kernel_Event, "Added timing event: queue name={}, queue id={}, usec={}, pointer={:x}",
eq->GetName(), event.event.ident, usec, reinterpret_cast<uintptr_t>(udata));
if (!eq->AddEvent(event) ||
!eq->ScheduleEvent(id, SceKernelEvent::Filter::Timer, TimerCallback)) {
return ORBIS_KERNEL_ERROR_ENOMEM;
}
return ORBIS_OK;
}
int PS4_SYSV_ABI sceKernelDeleteTimerEvent(SceKernelEqueue eq, int id) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
return eq->RemoveEvent(id, SceKernelEvent::Filter::Timer) ? ORBIS_OK
: ORBIS_KERNEL_ERROR_ENOENT;
}
int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
@ -380,6 +482,8 @@ void RegisterEventQueue(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge);
LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent);
LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent);
LIB_FUNCTION("57ZK+ODEXWY", "libkernel", 1, "libkernel", 1, 1, sceKernelAddTimerEvent);
LIB_FUNCTION("YWQFUyXIVdU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteTimerEvent);
LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent);
LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent);
LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId);

View File

@ -21,6 +21,9 @@ namespace Libraries::Kernel {
class EqueueInternal;
struct EqueueEvent;
using SceKernelUseconds = u32;
using SceKernelEqueue = EqueueInternal*;
struct SceKernelEvent {
enum Filter : s16 {
None = 0,
@ -77,6 +80,7 @@ struct EqueueEvent {
SceKernelEvent event;
void* data = nullptr;
std::chrono::steady_clock::time_point time_added;
std::chrono::microseconds timer_interval;
std::unique_ptr<boost::asio::steady_timer> timer;
void ResetTriggerState() {
@ -133,6 +137,8 @@ public:
}
bool AddEvent(EqueueEvent& event);
bool ScheduleEvent(u64 id, s16 filter,
void (*callback)(SceKernelEqueue, const SceKernelEvent&));
bool RemoveEvent(u64 id, s16 filter);
int WaitForEvents(SceKernelEvent* ev, int num, u32 micros);
bool TriggerEvent(u64 ident, s16 filter, void* trigger_data);
@ -152,6 +158,8 @@ public:
int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros);
bool EventExists(u64 id, s16 filter);
private:
std::string m_name;
std::mutex m_mutex;
@ -160,9 +168,6 @@ private:
std::condition_variable m_cond;
};
using SceKernelUseconds = u32;
using SceKernelEqueue = EqueueInternal*;
u64 PS4_SYSV_ABI sceKernelGetEventData(const SceKernelEvent* ev);
void RegisterEventQueue(Core::Loader::SymbolsResolver* sym);

View File

@ -290,6 +290,12 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut
directMemoryEndOut);
}
int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end) {
LOG_DEBUG(Kernel_Vmm, "called, addr = {}", fmt::ptr(addr));
auto* memory = Core::Memory::Instance();
return memory->IsStack(std::bit_cast<VAddr>(addr), start, end);
}
s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut) {
return sceKernelBatchMap2(entries, numEntries, numEntriesOut,
@ -636,6 +642,7 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("7oxv3PPCumo", "libkernel", 1, "libkernel", 1, 1, sceKernelReserveVirtualRange);
LIB_FUNCTION("BC+OG5m9+bw", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemoryType);
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
LIB_FUNCTION("yDBwVAolDgg", "libkernel", 1, "libkernel", 1, 1, sceKernelIsStack);
LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory);
LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);

View File

@ -158,6 +158,7 @@ void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]);
int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut,
void** directMemoryStartOut,
void** directMemoryEndOut);
int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end);
s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut);

View File

@ -315,7 +315,7 @@ int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
auto result = ef->Poll(bitPattern, wait, clear, pResultPat);
if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_EBUSY) {
LOG_ERROR(Kernel_Event, "returned {}", result);
LOG_DEBUG(Kernel_Event, "returned {:#x}", result);
}
return result;
@ -361,7 +361,7 @@ int PS4_SYSV_ABI sceKernelWaitEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_ETIMEDOUT) {
LOG_ERROR(Kernel_Event, "returned {:#x}", result);
LOG_DEBUG(Kernel_Event, "returned {:#x}", result);
}
return result;

View File

@ -380,8 +380,7 @@ s32 PS4_SYSV_ABI sceNgs2GeomApply(const OrbisNgs2GeomListenerWork* listener,
s32 PS4_SYSV_ABI sceNgs2PanInit(OrbisNgs2PanWork* work, const float* aSpeakerAngle, float unitAngle,
u32 numSpeakers) {
LOG_ERROR(Lib_Ngs2, "aSpeakerAngle = {}, unitAngle = {}, numSpeakers = {}", *aSpeakerAngle,
unitAngle, numSpeakers);
LOG_ERROR(Lib_Ngs2, "unitAngle = {}, numSpeakers = {}", unitAngle, numSpeakers);
return ORBIS_OK;
}

View File

@ -950,4 +950,33 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut,
return ORBIS_OK;
}
int MemoryManager::IsStack(VAddr addr, void** start, void** end) {
auto vma_handle = FindVMA(addr);
if (vma_handle == vma_map.end()) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
const VirtualMemoryArea& vma = vma_handle->second;
if (!vma.Contains(addr, 0) || vma.IsFree()) {
return ORBIS_KERNEL_ERROR_EACCES;
}
auto stack_start = 0ul;
auto stack_end = 0ul;
if (vma.type == VMAType::Stack) {
stack_start = vma.base;
stack_end = vma.base + vma.size;
}
if (start != nullptr) {
*start = reinterpret_cast<void*>(stack_start);
}
if (end != nullptr) {
*end = reinterpret_cast<void*>(stack_end);
}
return ORBIS_OK;
}
} // namespace Core

View File

@ -223,6 +223,8 @@ public:
void InvalidateMemory(VAddr addr, u64 size) const;
int IsStack(VAddr addr, void** start, void** end);
private:
VMAHandle FindVMA(VAddr target) {
return std::prev(vma_map.upper_bound(target));

View File

@ -53,7 +53,7 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
}
}
@ -75,7 +75,7 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index));
ir.SetScalarReg(dst_reg + i, ir.ReadConstBuffer(vsharp, index));
}
}

View File

@ -989,13 +989,22 @@ void Translator::V_CMP_NE_U64(const GcnInst& inst) {
}
};
const IR::U1 src0{get_src(inst.src[0])};
ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0
auto op = [&inst, this](auto x) {
switch (inst.src[1].field) {
case OperandField::ConstZero:
return x;
case OperandField::SignedConstIntNeg:
return ir.LogicalNot(x);
default:
UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field));
}
};
switch (inst.dst[1].field) {
case OperandField::VccLo:
ir.SetVcc(src0);
ir.SetVcc(op(src0));
break;
case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0);
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0));
break;
default:
UNREACHABLE();

View File

@ -584,7 +584,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
LOG_DEBUG(Render_Vulkan,
"Encountered EventWrite: event_type = {}, event_index = {}",
magic_enum::enum_name(event->event_type.Value()),
magic_enum::enum_name(event->event_index.Value()));
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
// TODO: handle proper synchronization, for now signal that update is done
// immediately
regs.cp_strmout_cntl.offset_update_done = 1;
}
break;
}
case PM4ItOpcode::EventWriteEos: {
@ -696,10 +705,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) &&
num_submits == mapped_queues[GfxQueueId].submits.size()) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); });
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
break;
}
while (!wait_reg_mem->Test()) {
while (!wait_reg_mem->Test(regs.reg_array)) {
YIELD_GFX();
}
break;
@ -732,6 +741,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
break;
}
case PM4ItOpcode::StrmoutBufferUpdate: {
const auto* strmout = reinterpret_cast<const PM4CmdStrmoutBufferUpdate*>(header);
LOG_WARNING(Render_Vulkan,
"Unimplemented IT_STRMOUT_BUFFER_UPDATE, update_memory = {}, "
"source_select = {}, buffer_select = {}",
strmout->update_memory.Value(),
magic_enum::enum_name(strmout->source_select.Value()),
strmout->buffer_select.Value());
break;
}
default:
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count);
@ -866,8 +885,9 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
}
case PM4ItOpcode::SetQueueReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetQueueReg*>(header);
UNREACHABLE_MSG("Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}",
set_data->vqid.Value(), set_data->reg_offset.Value());
LOG_WARNING(Render, "Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}",
set_data->vqid.Value(), set_data->reg_offset.Value());
break;
}
case PM4ItOpcode::DispatchDirect: {
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
@ -934,7 +954,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
while (!wait_reg_mem->Test()) {
while (!wait_reg_mem->Test(regs.reg_array)) {
YIELD_ASC(vqid);
}
break;

View File

@ -1175,6 +1175,14 @@ struct Liverpool {
BitField<22, 2, u32> onchip;
};
union StreamOutControl {
u32 raw;
struct {
u32 offset_update_done : 1;
u32 : 31;
};
};
union StreamOutConfig {
u32 raw;
struct {
@ -1378,7 +1386,9 @@ struct Liverpool {
AaConfig aa_config;
INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1);
ColorBuffer color_buffers[NumColorBuffers];
INSERT_PADDING_WORDS(0xC242 - 0xA390);
INSERT_PADDING_WORDS(0xC03F - 0xA390);
StreamOutControl cp_strmout_cntl;
INSERT_PADDING_WORDS(0xC242 - 0xC040);
PrimitiveType primitive_type;
INSERT_PADDING_WORDS(0xC24C - 0xC243);
u32 num_indices;
@ -1668,6 +1678,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);

View File

@ -246,6 +246,46 @@ struct PM4CmdNop {
};
};
enum class SourceSelect : u32 {
BufferOffset = 0,
VgtStrmoutBufferFilledSize = 1,
SrcAddress = 2,
None = 3,
};
struct PM4CmdStrmoutBufferUpdate {
PM4Type3Header header;
union {
BitField<0, 1, u32> update_memory;
BitField<1, 2, SourceSelect> source_select;
BitField<8, 2, u32> buffer_select;
u32 control;
};
union {
BitField<2, 30, u32> dst_address_lo;
BitField<0, 2, u32> swap_dst;
};
u32 dst_address_hi;
union {
u32 buffer_offset;
BitField<2, 30, u32> src_address_lo;
BitField<0, 2, u32> swap_src;
};
u32 src_address_hi;
template <typename T = u64>
T DstAddress() const {
ASSERT(update_memory.Value() == 1);
return reinterpret_cast<T>(dst_address_lo.Value() | u64(dst_address_hi & 0xFFFF) << 32);
}
template <typename T = u64>
T SrcAddress() const {
ASSERT(source_select.Value() == SourceSelect::SrcAddress);
return reinterpret_cast<T>(src_address_lo.Value() | u64(src_address_hi & 0xFFFF) << 32);
}
};
struct PM4CmdDrawIndexOffset2 {
PM4Type3Header header;
u32 max_size; ///< Maximum number of indices
@ -303,6 +343,80 @@ static u64 GetGpuClock64() {
return static_cast<u64>(ticks);
}
// VGT_EVENT_INITIATOR.EVENT_TYPE
enum class EventType : u32 {
SampleStreamoutStats1 = 1,
SampleStreamoutStats2 = 2,
SampleStreamoutStats3 = 3,
CacheFlushTs = 4,
ContextDone = 5,
CacheFlush = 6,
CsPartialFlush = 7,
VgtStreamoutSync = 8,
VgtStreamoutReset = 10,
EndOfPipeIncrDe = 11,
EndOfPipeIbEnd = 12,
RstPixCnt = 13,
VsPartialFlush = 15,
PsPartialFlush = 16,
FlushHsOutput = 17,
FlushLsOutput = 18,
CacheFlushAndInvTsEvent = 20,
ZpassDone = 21,
CacheFlushAndInvEvent = 22,
PerfcounterStart = 23,
PerfcounterStop = 24,
PipelineStatStart = 25,
PipelineStatStop = 26,
PerfcounterSample = 27,
FlushEsOutput = 28,
FlushGsOutput = 29,
SamplePipelineStat = 30,
SoVgtStreamoutFlush = 31,
SampleStreamoutStats = 32,
ResetVtxCnt = 33,
VgtFlush = 36,
ScSendDbVpz = 39,
BottomOfPipeTs = 40,
DbCacheFlushAndInv = 42,
FlushAndInvDbDataTs = 43,
FlushAndInvDbMeta = 44,
FlushAndInvCbDataTs = 45,
FlushAndInvCbMeta = 46,
CsDone = 47,
PsDone = 48,
FlushAndInvCbPixelData = 49,
ThreadTraceStart = 51,
ThreadTraceStop = 52,
ThreadTraceFlush = 54,
ThreadTraceFinish = 55,
PixelPipeStatControl = 56,
PixelPipeStatDump = 57,
PixelPipeStatReset = 58,
};
enum class EventIndex : u32 {
Other = 0,
ZpassDone = 1,
SamplePipelineStat = 2,
SampleStreamoutStatSx = 3,
CsVsPsPartialFlush = 4,
EopReserved = 5,
EosReserved = 6,
CacheFlush = 7,
};
struct PM4CmdEventWrite {
PM4Type3Header header;
union {
u32 event_control;
BitField<0, 6, EventType> event_type; ///< Event type written to VGT_EVENT_INITIATOR
BitField<8, 4, EventIndex> event_index; ///< Event index
BitField<20, 1, u32> inv_l2; ///< Send WBINVL2 op to the TC L2 cache when EVENT_INDEX = 0111
};
u32 address[];
};
struct PM4CmdEventWriteEop {
PM4Type3Header header;
union {
@ -474,7 +588,12 @@ struct PM4CmdWaitRegMem {
BitField<8, 1, Engine> engine;
u32 raw;
};
u32 poll_addr_lo;
union {
BitField<0, 16, u32> reg;
BitField<2, 30, u32> poll_addr_lo;
BitField<0, 2, u32> swap;
u32 poll_addr_lo_raw;
};
u32 poll_addr_hi;
u32 ref;
u32 mask;
@ -482,31 +601,36 @@ struct PM4CmdWaitRegMem {
template <typename T = u32*>
T Address() const {
return std::bit_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo);
return std::bit_cast<T>((uintptr_t(poll_addr_hi) << 32) | (poll_addr_lo << 2));
}
bool Test() const {
u32 Reg() const {
return reg.Value();
}
bool Test(const std::array<u32, Liverpool::NumRegs>& regs) const {
u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()];
switch (function.Value()) {
case Function::Always: {
return true;
}
case Function::LessThan: {
return (*Address() & mask) < ref;
return (value & mask) < ref;
}
case Function::LessThanEqual: {
return (*Address() & mask) <= ref;
return (value & mask) <= ref;
}
case Function::Equal: {
return (*Address() & mask) == ref;
return (value & mask) == ref;
}
case Function::NotEqual: {
return (*Address() & mask) != ref;
return (value & mask) != ref;
}
case Function::GreaterThanEqual: {
return (*Address() & mask) >= ref;
return (value & mask) >= ref;
}
case Function::GreaterThan: {
return (*Address() & mask) > ref;
return (value & mask) > ref;
}
case Function::Reserved:
[[fallthrough]];