Merge branch 'shadps4-emu:main' into mapmemory-assert-remove

This commit is contained in:
Stephen Miller 2025-05-15 06:11:26 -05:00 committed by GitHub
commit 88b1e0da8a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 365 additions and 54 deletions

View File

@ -54,9 +54,9 @@ else()
endif() endif()
if (ARCHITECTURE STREQUAL "x86_64") if (ARCHITECTURE STREQUAL "x86_64")
# Target the same CPU architecture as the PS4, to maintain the same level of compatibility. # Target x86-64-v3 CPU architecture as this is a good balance between supporting performance critical
# Exclude SSE4a as it is only available on AMD CPUs. # instructions like AVX2 and maintaining support for older CPUs.
add_compile_options(-march=btver2 -mtune=generic -mno-sse4a) add_compile_options(-march=x86-64-v3)
endif() endif()
if (APPLE AND ARCHITECTURE STREQUAL "x86_64" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64") if (APPLE AND ARCHITECTURE STREQUAL "x86_64" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64")

View File

@ -21,9 +21,9 @@ SPDX-License-Identifier: GPL-2.0-or-later
- A processor with at least 4 cores and 6 threads - A processor with at least 4 cores and 6 threads
- Above 2.5 GHz frequency - Above 2.5 GHz frequency
- A CPU supporting the following instruction sets: MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, F16C, CLMUL, AES, BMI1, MOVBE, XSAVE, ABM - A CPU supporting the x86-64-v3 baseline.
- **Intel**: Haswell generation or newer - **Intel**: Haswell generation or newer
- **AMD**: Jaguar generation or newer - **AMD**: Excavator generation or newer
- **Apple**: Rosetta 2 on macOS 15.4 or newer - **Apple**: Rosetta 2 on macOS 15.4 or newer
### GPU ### GPU

View File

@ -179,7 +179,7 @@ s32 PS4_SYSV_ABI sceGnmComputeWaitOnAddress(u32* cmdbuf, u32 size, uintptr_t add
auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf); auto* wait_reg_mem = reinterpret_cast<PM4CmdWaitRegMem*>(cmdbuf);
wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5}; wait_reg_mem->header = PM4Type3Header{PM4ItOpcode::WaitRegMem, 5};
wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u); wait_reg_mem->raw = (is_mem << 4u) | (cmp_func & 7u);
wait_reg_mem->poll_addr_lo = u32(addr & addr_mask); wait_reg_mem->poll_addr_lo_raw = u32(addr & addr_mask);
wait_reg_mem->poll_addr_hi = u32(addr >> 32u); wait_reg_mem->poll_addr_hi = u32(addr >> 32u);
wait_reg_mem->ref = ref; wait_reg_mem->ref = ref;
wait_reg_mem->mask = mask; wait_reg_mem->mask = mask;

View File

@ -12,12 +12,25 @@
namespace Libraries::Kernel { namespace Libraries::Kernel {
extern boost::asio::io_context io_context;
extern void KernelSignalRequest();
static constexpr auto HrTimerSpinlockThresholdUs = 1200u;
// Events are uniquely identified by id and filter. // Events are uniquely identified by id and filter.
bool EqueueInternal::AddEvent(EqueueEvent& event) { bool EqueueInternal::AddEvent(EqueueEvent& event) {
std::scoped_lock lock{m_mutex}; std::scoped_lock lock{m_mutex};
event.time_added = std::chrono::steady_clock::now(); event.time_added = std::chrono::steady_clock::now();
if (event.event.filter == SceKernelEvent::Filter::Timer ||
event.event.filter == SceKernelEvent::Filter::HrTimer) {
// HrTimer events are offset by the threshold of time at the end that we spinlock for
// greater accuracy.
const auto offset =
event.event.filter == SceKernelEvent::Filter::HrTimer ? HrTimerSpinlockThresholdUs : 0u;
event.timer_interval = std::chrono::microseconds(event.event.data - offset);
}
const auto& it = std::ranges::find(m_events, event); const auto& it = std::ranges::find(m_events, event);
if (it != m_events.cend()) { if (it != m_events.cend()) {
@ -29,6 +42,47 @@ bool EqueueInternal::AddEvent(EqueueEvent& event) {
return true; return true;
} }
bool EqueueInternal::ScheduleEvent(u64 id, s16 filter,
void (*callback)(SceKernelEqueue, const SceKernelEvent&)) {
std::scoped_lock lock{m_mutex};
const auto& it = std::ranges::find_if(m_events, [id, filter](auto& ev) {
return ev.event.ident == id && ev.event.filter == filter;
});
if (it == m_events.cend()) {
return false;
}
const auto& event = *it;
ASSERT(event.event.filter == SceKernelEvent::Filter::Timer ||
event.event.filter == SceKernelEvent::Filter::HrTimer);
if (!it->timer) {
it->timer = std::make_unique<boost::asio::steady_timer>(io_context, event.timer_interval);
} else {
// If the timer already exists we are scheduling a reoccurrence after the next period.
// Set the expiration time to the previous occurrence plus the period.
it->timer->expires_at(it->timer->expiry() + event.timer_interval);
}
it->timer->async_wait(
[this, event_data = event.event, callback](const boost::system::error_code& ec) {
if (ec) {
if (ec != boost::system::errc::operation_canceled) {
LOG_ERROR(Kernel_Event, "Timer callback error: {}", ec.message());
} else {
// Timer was cancelled (removed) before it triggered
LOG_DEBUG(Kernel_Event, "Timer cancelled");
}
return;
}
callback(this, event_data);
});
KernelSignalRequest();
return true;
}
bool EqueueInternal::RemoveEvent(u64 id, s16 filter) { bool EqueueInternal::RemoveEvent(u64 id, s16 filter) {
bool has_found = false; bool has_found = false;
std::scoped_lock lock{m_mutex}; std::scoped_lock lock{m_mutex};
@ -152,18 +206,14 @@ int EqueueInternal::WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros) {
return count; return count;
} }
extern boost::asio::io_context io_context; bool EqueueInternal::EventExists(u64 id, s16 filter) {
extern void KernelSignalRequest(); std::scoped_lock lock{m_mutex};
static constexpr auto HrTimerSpinlockThresholdUs = 1200u; const auto& it = std::ranges::find_if(m_events, [id, filter](auto& ev) {
return ev.event.ident == id && ev.event.filter == filter;
});
static void SmallTimerCallback(const boost::system::error_code& error, SceKernelEqueue eq, return it != m_events.cend();
SceKernelEvent kevent) {
static EqueueEvent event;
event.event = kevent;
event.event.data = HrTimerSpinlockThresholdUs;
eq->AddSmallTimer(event);
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
} }
int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) { int PS4_SYSV_ABI sceKernelCreateEqueue(SceKernelEqueue* eq, const char* name) {
@ -243,6 +293,14 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int
return ORBIS_OK; return ORBIS_OK;
} }
static void HrTimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) {
static EqueueEvent event;
event.event = kevent;
event.event.data = HrTimerSpinlockThresholdUs;
eq->AddSmallTimer(event);
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
}
s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) { s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) {
if (eq == nullptr) { if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF; return ORBIS_KERNEL_ERROR_EBADF;
@ -273,17 +331,10 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec*
return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM; return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM;
} }
event.timer = std::make_unique<boost::asio::steady_timer>( if (!eq->AddEvent(event) ||
io_context, std::chrono::microseconds(total_us - HrTimerSpinlockThresholdUs)); !eq->ScheduleEvent(id, SceKernelEvent::Filter::HrTimer, HrTimerCallback)) {
event.timer->async_wait(std::bind(SmallTimerCallback, std::placeholders::_1, eq, event.event));
if (!eq->AddEvent(event)) {
return ORBIS_KERNEL_ERROR_ENOMEM; return ORBIS_KERNEL_ERROR_ENOMEM;
} }
KernelSignalRequest();
return ORBIS_OK; return ORBIS_OK;
} }
@ -300,6 +351,57 @@ int PS4_SYSV_ABI sceKernelDeleteHRTimerEvent(SceKernelEqueue eq, int id) {
} }
} }
static void TimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) {
if (eq->EventExists(kevent.ident, kevent.filter)) {
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::Timer, kevent.udata);
if (!(kevent.flags & SceKernelEvent::Flags::OneShot)) {
// Reschedule the event for its next period.
eq->ScheduleEvent(kevent.ident, kevent.filter, TimerCallback);
}
}
}
int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUseconds usec,
void* udata) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
EqueueEvent event{};
event.event.ident = static_cast<u64>(id);
event.event.filter = SceKernelEvent::Filter::Timer;
event.event.flags = SceKernelEvent::Flags::Add;
event.event.fflags = 0;
event.event.data = usec;
event.event.udata = udata;
if (eq->EventExists(event.event.ident, event.event.filter)) {
eq->RemoveEvent(id, SceKernelEvent::Filter::Timer);
LOG_DEBUG(Kernel_Event,
"Timer event already exists, removing it: queue name={}, queue id={}",
eq->GetName(), event.event.ident);
}
LOG_DEBUG(Kernel_Event, "Added timing event: queue name={}, queue id={}, usec={}, pointer={:x}",
eq->GetName(), event.event.ident, usec, reinterpret_cast<uintptr_t>(udata));
if (!eq->AddEvent(event) ||
!eq->ScheduleEvent(id, SceKernelEvent::Filter::Timer, TimerCallback)) {
return ORBIS_KERNEL_ERROR_ENOMEM;
}
return ORBIS_OK;
}
int PS4_SYSV_ABI sceKernelDeleteTimerEvent(SceKernelEqueue eq, int id) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
return eq->RemoveEvent(id, SceKernelEvent::Filter::Timer) ? ORBIS_OK
: ORBIS_KERNEL_ERROR_ENOENT;
}
int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) { int PS4_SYSV_ABI sceKernelAddUserEvent(SceKernelEqueue eq, int id) {
if (eq == nullptr) { if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF; return ORBIS_KERNEL_ERROR_EBADF;
@ -380,6 +482,8 @@ void RegisterEventQueue(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge); LIB_FUNCTION("WDszmSbWuDk", "libkernel", 1, "libkernel", 1, 1, sceKernelAddUserEventEdge);
LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent); LIB_FUNCTION("R74tt43xP6k", "libkernel", 1, "libkernel", 1, 1, sceKernelAddHRTimerEvent);
LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent); LIB_FUNCTION("J+LF6LwObXU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteHRTimerEvent);
LIB_FUNCTION("57ZK+ODEXWY", "libkernel", 1, "libkernel", 1, 1, sceKernelAddTimerEvent);
LIB_FUNCTION("YWQFUyXIVdU", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteTimerEvent);
LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent); LIB_FUNCTION("F6e0kwo4cnk", "libkernel", 1, "libkernel", 1, 1, sceKernelTriggerUserEvent);
LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent); LIB_FUNCTION("LJDwdSNTnDg", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteUserEvent);
LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId); LIB_FUNCTION("mJ7aghmgvfc", "libkernel", 1, "libkernel", 1, 1, sceKernelGetEventId);

View File

@ -21,6 +21,9 @@ namespace Libraries::Kernel {
class EqueueInternal; class EqueueInternal;
struct EqueueEvent; struct EqueueEvent;
using SceKernelUseconds = u32;
using SceKernelEqueue = EqueueInternal*;
struct SceKernelEvent { struct SceKernelEvent {
enum Filter : s16 { enum Filter : s16 {
None = 0, None = 0,
@ -77,6 +80,7 @@ struct EqueueEvent {
SceKernelEvent event; SceKernelEvent event;
void* data = nullptr; void* data = nullptr;
std::chrono::steady_clock::time_point time_added; std::chrono::steady_clock::time_point time_added;
std::chrono::microseconds timer_interval;
std::unique_ptr<boost::asio::steady_timer> timer; std::unique_ptr<boost::asio::steady_timer> timer;
void ResetTriggerState() { void ResetTriggerState() {
@ -133,6 +137,8 @@ public:
} }
bool AddEvent(EqueueEvent& event); bool AddEvent(EqueueEvent& event);
bool ScheduleEvent(u64 id, s16 filter,
void (*callback)(SceKernelEqueue, const SceKernelEvent&));
bool RemoveEvent(u64 id, s16 filter); bool RemoveEvent(u64 id, s16 filter);
int WaitForEvents(SceKernelEvent* ev, int num, u32 micros); int WaitForEvents(SceKernelEvent* ev, int num, u32 micros);
bool TriggerEvent(u64 ident, s16 filter, void* trigger_data); bool TriggerEvent(u64 ident, s16 filter, void* trigger_data);
@ -152,6 +158,8 @@ public:
int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros); int WaitForSmallTimer(SceKernelEvent* ev, int num, u32 micros);
bool EventExists(u64 id, s16 filter);
private: private:
std::string m_name; std::string m_name;
std::mutex m_mutex; std::mutex m_mutex;
@ -160,9 +168,6 @@ private:
std::condition_variable m_cond; std::condition_variable m_cond;
}; };
using SceKernelUseconds = u32;
using SceKernelEqueue = EqueueInternal*;
u64 PS4_SYSV_ABI sceKernelGetEventData(const SceKernelEvent* ev); u64 PS4_SYSV_ABI sceKernelGetEventData(const SceKernelEvent* ev);
void RegisterEventQueue(Core::Loader::SymbolsResolver* sym); void RegisterEventQueue(Core::Loader::SymbolsResolver* sym);

View File

@ -290,6 +290,12 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut
directMemoryEndOut); directMemoryEndOut);
} }
int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end) {
LOG_DEBUG(Kernel_Vmm, "called, addr = {}", fmt::ptr(addr));
auto* memory = Core::Memory::Instance();
return memory->IsStack(std::bit_cast<VAddr>(addr), start, end);
}
s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut) { int* numEntriesOut) {
return sceKernelBatchMap2(entries, numEntries, numEntriesOut, return sceKernelBatchMap2(entries, numEntries, numEntriesOut,
@ -636,6 +642,7 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("7oxv3PPCumo", "libkernel", 1, "libkernel", 1, 1, sceKernelReserveVirtualRange); LIB_FUNCTION("7oxv3PPCumo", "libkernel", 1, "libkernel", 1, 1, sceKernelReserveVirtualRange);
LIB_FUNCTION("BC+OG5m9+bw", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemoryType); LIB_FUNCTION("BC+OG5m9+bw", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemoryType);
LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize); LIB_FUNCTION("pO96TwzOm5E", "libkernel", 1, "libkernel", 1, 1, sceKernelGetDirectMemorySize);
LIB_FUNCTION("yDBwVAolDgg", "libkernel", 1, "libkernel", 1, 1, sceKernelIsStack);
LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory); LIB_FUNCTION("NcaWUxfMNIQ", "libkernel", 1, "libkernel", 1, 1, sceKernelMapNamedDirectMemory);
LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory); LIB_FUNCTION("L-Q3LEjIbgA", "libkernel", 1, "libkernel", 1, 1, sceKernelMapDirectMemory);
LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection); LIB_FUNCTION("WFcfL2lzido", "libkernel", 1, "libkernel", 1, 1, sceKernelQueryMemoryProtection);

View File

@ -158,6 +158,7 @@ void PS4_SYSV_ABI _sceKernelRtldSetApplicationHeapAPI(void* func[]);
int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut, int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut,
void** directMemoryStartOut, void** directMemoryStartOut,
void** directMemoryEndOut); void** directMemoryEndOut);
int PS4_SYSV_ABI sceKernelIsStack(void* addr, void** start, void** end);
s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries,
int* numEntriesOut); int* numEntriesOut);

View File

@ -315,7 +315,7 @@ int PS4_SYSV_ABI sceKernelPollEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
auto result = ef->Poll(bitPattern, wait, clear, pResultPat); auto result = ef->Poll(bitPattern, wait, clear, pResultPat);
if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_EBUSY) { if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_EBUSY) {
LOG_ERROR(Kernel_Event, "returned {}", result); LOG_DEBUG(Kernel_Event, "returned {:#x}", result);
} }
return result; return result;
@ -361,7 +361,7 @@ int PS4_SYSV_ABI sceKernelWaitEventFlag(OrbisKernelEventFlag ef, u64 bitPattern,
u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout); u32 result = ef->Wait(bitPattern, wait, clear, pResultPat, pTimeout);
if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_ETIMEDOUT) { if (result != ORBIS_OK && result != ORBIS_KERNEL_ERROR_ETIMEDOUT) {
LOG_ERROR(Kernel_Event, "returned {:#x}", result); LOG_DEBUG(Kernel_Event, "returned {:#x}", result);
} }
return result; return result;

View File

@ -380,8 +380,7 @@ s32 PS4_SYSV_ABI sceNgs2GeomApply(const OrbisNgs2GeomListenerWork* listener,
s32 PS4_SYSV_ABI sceNgs2PanInit(OrbisNgs2PanWork* work, const float* aSpeakerAngle, float unitAngle, s32 PS4_SYSV_ABI sceNgs2PanInit(OrbisNgs2PanWork* work, const float* aSpeakerAngle, float unitAngle,
u32 numSpeakers) { u32 numSpeakers) {
LOG_ERROR(Lib_Ngs2, "aSpeakerAngle = {}, unitAngle = {}, numSpeakers = {}", *aSpeakerAngle, LOG_ERROR(Lib_Ngs2, "unitAngle = {}, numSpeakers = {}", unitAngle, numSpeakers);
unitAngle, numSpeakers);
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -950,4 +950,33 @@ int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut,
return ORBIS_OK; return ORBIS_OK;
} }
int MemoryManager::IsStack(VAddr addr, void** start, void** end) {
auto vma_handle = FindVMA(addr);
if (vma_handle == vma_map.end()) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
const VirtualMemoryArea& vma = vma_handle->second;
if (!vma.Contains(addr, 0) || vma.IsFree()) {
return ORBIS_KERNEL_ERROR_EACCES;
}
auto stack_start = 0ul;
auto stack_end = 0ul;
if (vma.type == VMAType::Stack) {
stack_start = vma.base;
stack_end = vma.base + vma.size;
}
if (start != nullptr) {
*start = reinterpret_cast<void*>(stack_start);
}
if (end != nullptr) {
*end = reinterpret_cast<void*>(stack_end);
}
return ORBIS_OK;
}
} // namespace Core } // namespace Core

View File

@ -223,6 +223,8 @@ public:
void InvalidateMemory(VAddr addr, u64 size) const; void InvalidateMemory(VAddr addr, u64 size) const;
int IsStack(VAddr addr, void** start, void** end);
private: private:
VMAHandle FindVMA(VAddr target) { VMAHandle FindVMA(VAddr target) {
return std::prev(vma_map.upper_bound(target)); return std::prev(vma_map.upper_bound(target));

View File

@ -53,7 +53,7 @@ void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1)); ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code}; IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) { for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i))); ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
} }
} }
@ -75,7 +75,7 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
IR::ScalarReg dst_reg{inst.dst[0].code}; IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) { for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i)); const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
ir.SetScalarReg(dst_reg++, ir.ReadConstBuffer(vsharp, index)); ir.SetScalarReg(dst_reg + i, ir.ReadConstBuffer(vsharp, index));
} }
} }

View File

@ -989,13 +989,22 @@ void Translator::V_CMP_NE_U64(const GcnInst& inst) {
} }
}; };
const IR::U1 src0{get_src(inst.src[0])}; const IR::U1 src0{get_src(inst.src[0])};
ASSERT(inst.src[1].field == OperandField::ConstZero); // src0 != 0 auto op = [&inst, this](auto x) {
switch (inst.src[1].field) {
case OperandField::ConstZero:
return x;
case OperandField::SignedConstIntNeg:
return ir.LogicalNot(x);
default:
UNREACHABLE_MSG("unhandled V_CMP_NE_U64 source argument {}", u32(inst.src[1].field));
}
};
switch (inst.dst[1].field) { switch (inst.dst[1].field) {
case OperandField::VccLo: case OperandField::VccLo:
ir.SetVcc(src0); ir.SetVcc(op(src0));
break; break;
case OperandField::ScalarGPR: case OperandField::ScalarGPR:
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), src0); ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), op(src0));
break; break;
default: default:
UNREACHABLE(); UNREACHABLE();

View File

@ -584,7 +584,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
break; break;
} }
case PM4ItOpcode::EventWrite: { case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header); const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
LOG_DEBUG(Render_Vulkan,
"Encountered EventWrite: event_type = {}, event_index = {}",
magic_enum::enum_name(event->event_type.Value()),
magic_enum::enum_name(event->event_index.Value()));
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
// TODO: handle proper synchronization, for now signal that update is done
// immediately
regs.cp_strmout_cntl.offset_update_done = 1;
}
break; break;
} }
case PM4ItOpcode::EventWriteEos: { case PM4ItOpcode::EventWriteEos: {
@ -696,10 +705,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const u64* wait_addr = wait_reg_mem->Address<u64*>(); const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) && if (vo_port->IsVoLabel(wait_addr) &&
num_submits == mapped_queues[GfxQueueId].submits.size()) { num_submits == mapped_queues[GfxQueueId].submits.size()) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(); }); vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
break; break;
} }
while (!wait_reg_mem->Test()) { while (!wait_reg_mem->Test(regs.reg_array)) {
YIELD_GFX(); YIELD_GFX();
} }
break; break;
@ -732,6 +741,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
} }
break; break;
} }
case PM4ItOpcode::StrmoutBufferUpdate: {
const auto* strmout = reinterpret_cast<const PM4CmdStrmoutBufferUpdate*>(header);
LOG_WARNING(Render_Vulkan,
"Unimplemented IT_STRMOUT_BUFFER_UPDATE, update_memory = {}, "
"source_select = {}, buffer_select = {}",
strmout->update_memory.Value(),
magic_enum::enum_name(strmout->source_select.Value()),
strmout->buffer_select.Value());
break;
}
default: default:
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count); static_cast<u32>(opcode), count);
@ -866,8 +885,9 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
} }
case PM4ItOpcode::SetQueueReg: { case PM4ItOpcode::SetQueueReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetQueueReg*>(header); const auto* set_data = reinterpret_cast<const PM4CmdSetQueueReg*>(header);
UNREACHABLE_MSG("Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}", LOG_WARNING(Render, "Encountered compute SetQueueReg: vqid = {}, reg_offset = {:#x}",
set_data->vqid.Value(), set_data->reg_offset.Value()); set_data->vqid.Value(), set_data->reg_offset.Value());
break;
} }
case PM4ItOpcode::DispatchDirect: { case PM4ItOpcode::DispatchDirect: {
const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header); const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
@ -934,7 +954,7 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
case PM4ItOpcode::WaitRegMem: { case PM4ItOpcode::WaitRegMem: {
const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header); const auto* wait_reg_mem = reinterpret_cast<const PM4CmdWaitRegMem*>(header);
ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me); ASSERT(wait_reg_mem->engine.Value() == PM4CmdWaitRegMem::Engine::Me);
while (!wait_reg_mem->Test()) { while (!wait_reg_mem->Test(regs.reg_array)) {
YIELD_ASC(vqid); YIELD_ASC(vqid);
} }
break; break;

View File

@ -1175,6 +1175,14 @@ struct Liverpool {
BitField<22, 2, u32> onchip; BitField<22, 2, u32> onchip;
}; };
union StreamOutControl {
u32 raw;
struct {
u32 offset_update_done : 1;
u32 : 31;
};
};
union StreamOutConfig { union StreamOutConfig {
u32 raw; u32 raw;
struct { struct {
@ -1378,7 +1386,9 @@ struct Liverpool {
AaConfig aa_config; AaConfig aa_config;
INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1); INSERT_PADDING_WORDS(0xA318 - 0xA2F8 - 1);
ColorBuffer color_buffers[NumColorBuffers]; ColorBuffer color_buffers[NumColorBuffers];
INSERT_PADDING_WORDS(0xC242 - 0xA390); INSERT_PADDING_WORDS(0xC03F - 0xA390);
StreamOutControl cp_strmout_cntl;
INSERT_PADDING_WORDS(0xC242 - 0xC040);
PrimitiveType primitive_type; PrimitiveType primitive_type;
INSERT_PADDING_WORDS(0xC24C - 0xC243); INSERT_PADDING_WORDS(0xC24C - 0xC243);
u32 num_indices; u32 num_indices;
@ -1668,6 +1678,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].pitch) == 0xA319);
static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381); static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(cp_strmout_cntl) == 0xC03F);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242); static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D); static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250); static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);

View File

@ -246,6 +246,46 @@ struct PM4CmdNop {
}; };
}; };
enum class SourceSelect : u32 {
BufferOffset = 0,
VgtStrmoutBufferFilledSize = 1,
SrcAddress = 2,
None = 3,
};
struct PM4CmdStrmoutBufferUpdate {
PM4Type3Header header;
union {
BitField<0, 1, u32> update_memory;
BitField<1, 2, SourceSelect> source_select;
BitField<8, 2, u32> buffer_select;
u32 control;
};
union {
BitField<2, 30, u32> dst_address_lo;
BitField<0, 2, u32> swap_dst;
};
u32 dst_address_hi;
union {
u32 buffer_offset;
BitField<2, 30, u32> src_address_lo;
BitField<0, 2, u32> swap_src;
};
u32 src_address_hi;
template <typename T = u64>
T DstAddress() const {
ASSERT(update_memory.Value() == 1);
return reinterpret_cast<T>(dst_address_lo.Value() | u64(dst_address_hi & 0xFFFF) << 32);
}
template <typename T = u64>
T SrcAddress() const {
ASSERT(source_select.Value() == SourceSelect::SrcAddress);
return reinterpret_cast<T>(src_address_lo.Value() | u64(src_address_hi & 0xFFFF) << 32);
}
};
struct PM4CmdDrawIndexOffset2 { struct PM4CmdDrawIndexOffset2 {
PM4Type3Header header; PM4Type3Header header;
u32 max_size; ///< Maximum number of indices u32 max_size; ///< Maximum number of indices
@ -303,6 +343,80 @@ static u64 GetGpuClock64() {
return static_cast<u64>(ticks); return static_cast<u64>(ticks);
} }
// VGT_EVENT_INITIATOR.EVENT_TYPE
enum class EventType : u32 {
SampleStreamoutStats1 = 1,
SampleStreamoutStats2 = 2,
SampleStreamoutStats3 = 3,
CacheFlushTs = 4,
ContextDone = 5,
CacheFlush = 6,
CsPartialFlush = 7,
VgtStreamoutSync = 8,
VgtStreamoutReset = 10,
EndOfPipeIncrDe = 11,
EndOfPipeIbEnd = 12,
RstPixCnt = 13,
VsPartialFlush = 15,
PsPartialFlush = 16,
FlushHsOutput = 17,
FlushLsOutput = 18,
CacheFlushAndInvTsEvent = 20,
ZpassDone = 21,
CacheFlushAndInvEvent = 22,
PerfcounterStart = 23,
PerfcounterStop = 24,
PipelineStatStart = 25,
PipelineStatStop = 26,
PerfcounterSample = 27,
FlushEsOutput = 28,
FlushGsOutput = 29,
SamplePipelineStat = 30,
SoVgtStreamoutFlush = 31,
SampleStreamoutStats = 32,
ResetVtxCnt = 33,
VgtFlush = 36,
ScSendDbVpz = 39,
BottomOfPipeTs = 40,
DbCacheFlushAndInv = 42,
FlushAndInvDbDataTs = 43,
FlushAndInvDbMeta = 44,
FlushAndInvCbDataTs = 45,
FlushAndInvCbMeta = 46,
CsDone = 47,
PsDone = 48,
FlushAndInvCbPixelData = 49,
ThreadTraceStart = 51,
ThreadTraceStop = 52,
ThreadTraceFlush = 54,
ThreadTraceFinish = 55,
PixelPipeStatControl = 56,
PixelPipeStatDump = 57,
PixelPipeStatReset = 58,
};
enum class EventIndex : u32 {
Other = 0,
ZpassDone = 1,
SamplePipelineStat = 2,
SampleStreamoutStatSx = 3,
CsVsPsPartialFlush = 4,
EopReserved = 5,
EosReserved = 6,
CacheFlush = 7,
};
struct PM4CmdEventWrite {
PM4Type3Header header;
union {
u32 event_control;
BitField<0, 6, EventType> event_type; ///< Event type written to VGT_EVENT_INITIATOR
BitField<8, 4, EventIndex> event_index; ///< Event index
BitField<20, 1, u32> inv_l2; ///< Send WBINVL2 op to the TC L2 cache when EVENT_INDEX = 0111
};
u32 address[];
};
struct PM4CmdEventWriteEop { struct PM4CmdEventWriteEop {
PM4Type3Header header; PM4Type3Header header;
union { union {
@ -474,7 +588,12 @@ struct PM4CmdWaitRegMem {
BitField<8, 1, Engine> engine; BitField<8, 1, Engine> engine;
u32 raw; u32 raw;
}; };
u32 poll_addr_lo; union {
BitField<0, 16, u32> reg;
BitField<2, 30, u32> poll_addr_lo;
BitField<0, 2, u32> swap;
u32 poll_addr_lo_raw;
};
u32 poll_addr_hi; u32 poll_addr_hi;
u32 ref; u32 ref;
u32 mask; u32 mask;
@ -482,31 +601,36 @@ struct PM4CmdWaitRegMem {
template <typename T = u32*> template <typename T = u32*>
T Address() const { T Address() const {
return std::bit_cast<T>((uintptr_t(poll_addr_hi) << 32) | poll_addr_lo); return std::bit_cast<T>((uintptr_t(poll_addr_hi) << 32) | (poll_addr_lo << 2));
} }
bool Test() const { u32 Reg() const {
return reg.Value();
}
bool Test(const std::array<u32, Liverpool::NumRegs>& regs) const {
u32 value = mem_space.Value() == MemSpace::Memory ? *Address() : regs[Reg()];
switch (function.Value()) { switch (function.Value()) {
case Function::Always: { case Function::Always: {
return true; return true;
} }
case Function::LessThan: { case Function::LessThan: {
return (*Address() & mask) < ref; return (value & mask) < ref;
} }
case Function::LessThanEqual: { case Function::LessThanEqual: {
return (*Address() & mask) <= ref; return (value & mask) <= ref;
} }
case Function::Equal: { case Function::Equal: {
return (*Address() & mask) == ref; return (value & mask) == ref;
} }
case Function::NotEqual: { case Function::NotEqual: {
return (*Address() & mask) != ref; return (value & mask) != ref;
} }
case Function::GreaterThanEqual: { case Function::GreaterThanEqual: {
return (*Address() & mask) >= ref; return (value & mask) >= ref;
} }
case Function::GreaterThan: { case Function::GreaterThan: {
return (*Address() & mask) > ref; return (value & mask) > ref;
} }
case Function::Reserved: case Function::Reserved:
[[fallthrough]]; [[fallthrough]];