diff --git a/CMakeLists.txt b/CMakeLists.txt index 503ee7e6c..08cc41036 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -309,6 +309,8 @@ set(CORE src/core/aerolib/stubs.cpp src/core/file_format/pkg_type.h src/core/file_format/psf.cpp src/core/file_format/psf.h + src/core/file_format/playgo_chunk.cpp + src/core/file_format/playgo_chunk.h src/core/file_format/trp.cpp src/core/file_format/trp.h src/core/file_format/splash.h diff --git a/src/common/config.cpp b/src/common/config.cpp index 98c4b123b..57f40b212 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -230,7 +230,7 @@ void load(const std::filesystem::path& path) { auto general = generalResult.unwrap(); isNeo = toml::find_or(general, "isPS4Pro", false); - isFullscreen = toml::find_or(general, "Fullscreen", true); + isFullscreen = toml::find_or(general, "Fullscreen", false); logFilter = toml::find_or(general, "logFilter", ""); logType = toml::find_or(general, "logType", "sync"); isShowSplash = toml::find_or(general, "showSplash", true); diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp index ba615f6df..d34aa8a12 100644 --- a/src/common/path_util.cpp +++ b/src/common/path_util.cpp @@ -72,8 +72,9 @@ static auto UserPaths = [] { create_path(PathType::GameDataDir, user_dir / GAMEDATA_DIR); create_path(PathType::TempDataDir, user_dir / TEMPDATA_DIR); create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR); + create_path(PathType::DownloadDir, user_dir / DOWNLOAD_DIR); create_path(PathType::CapturesDir, user_dir / CAPTURES_DIR); - + return paths; }(); diff --git a/src/common/path_util.h b/src/common/path_util.h index 52c343368..263edd46e 100644 --- a/src/common/path_util.h +++ b/src/common/path_util.h @@ -18,6 +18,7 @@ enum class PathType { TempDataDir, // Where game temp data is stored. GameDataDir, // Where game data is stored. SysModuleDir, // Where system modules are stored. + DownloadDir, // Where downloads/temp files are stored. CapturesDir, // Where rdoc captures are stored. }; @@ -32,6 +33,7 @@ constexpr auto SAVEDATA_DIR = "savedata"; constexpr auto GAMEDATA_DIR = "data"; constexpr auto TEMPDATA_DIR = "temp"; constexpr auto SYSMODULES_DIR = "sys_modules"; +constexpr auto DOWNLOAD_DIR = "download"; constexpr auto CAPTURES_DIR = "captures"; // Filenames diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 2a9cf5e29..42318822b 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -285,20 +285,24 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe const auto slot = GetTcbKey(); #if defined(_WIN32) - // The following logic is based on the wine implementation of TlsGetValue - // https://github.com/wine-mirror/wine/blob/a27b9551/dlls/kernelbase/thread.c#L719 + // The following logic is based on the Kernel32.dll asm of TlsGetValue static constexpr u32 TlsSlotsOffset = 0x1480; static constexpr u32 TlsExpansionSlotsOffset = 0x1780; static constexpr u32 TlsMinimumAvailable = 64; - const u32 teb_offset = slot < TlsMinimumAvailable ? TlsSlotsOffset : TlsExpansionSlotsOffset; - const u32 tls_index = slot < TlsMinimumAvailable ? slot : slot - TlsMinimumAvailable; - // Load the pointer to the table of TLS slots. c.putSeg(gs); - c.mov(dst, ptr[reinterpret_cast(teb_offset)]); - // Load the pointer to our buffer. - c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]); + if (slot < TlsMinimumAvailable) { + // Load the pointer to TLS slots. + c.mov(dst, ptr[reinterpret_cast(TlsSlotsOffset + slot * sizeof(LPVOID))]); + } else { + const u32 tls_index = slot - TlsMinimumAvailable; + + // Load the pointer to the table of TLS expansion slots. + c.mov(dst, ptr[reinterpret_cast(TlsExpansionSlotsOffset)]); + // Load the pointer to our buffer. + c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]); + } #elif defined(__APPLE__) // The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by // pthread_getspecific https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96 diff --git a/src/core/file_format/playgo_chunk.cpp b/src/core/file_format/playgo_chunk.cpp new file mode 100644 index 000000000..43d8a4ded --- /dev/null +++ b/src/core/file_format/playgo_chunk.cpp @@ -0,0 +1,16 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/io_file.h" + +#include "playgo_chunk.h" + +bool PlaygoChunk::Open(const std::filesystem::path& filepath) { + Common::FS::IOFile file(filepath, Common::FS::FileAccessMode::Read); + if (!file.IsOpen()) { + return false; + } + file.Read(playgoHeader); + + return true; +} \ No newline at end of file diff --git a/src/core/file_format/playgo_chunk.h b/src/core/file_format/playgo_chunk.h new file mode 100644 index 000000000..d17d24bf9 --- /dev/null +++ b/src/core/file_format/playgo_chunk.h @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include +#include "common/types.h" + +struct PlaygoHeader { + u32 magic; + + u16 version_major; + u16 version_minor; + u16 image_count; + u16 chunk_count; + u16 mchunk_count; + u16 scenario_count; + // TODO fill the rest +}; +class PlaygoChunk { +public: + PlaygoChunk() = default; + ~PlaygoChunk() = default; + + bool Open(const std::filesystem::path& filepath); + PlaygoHeader GetPlaygoHeader() { + return playgoHeader; + } + +private: + PlaygoHeader playgoHeader; +}; \ No newline at end of file diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 2f57c9f34..3177770b0 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -26,23 +26,27 @@ void MntPoints::UnmountAll() { } std::filesystem::path MntPoints::GetHostPath(const std::string& guest_directory) { - const MntPair* mount = GetMount(guest_directory); + // Evil games like Turok2 pass double slashes e.g /app0//game.kpf + auto corrected_path = guest_directory; + size_t pos = corrected_path.find("//"); + while (pos != std::string::npos) { + corrected_path.replace(pos, 2, "/"); + pos = corrected_path.find("//", pos + 1); + } + + const MntPair* mount = GetMount(corrected_path); if (!mount) { - return guest_directory; + return ""; } // Nothing to do if getting the mount itself. - if (guest_directory == mount->mount) { + if (corrected_path == mount->mount) { return mount->host_path; } // Remove device (e.g /app0) from path to retrieve relative path. - u32 pos = mount->mount.size() + 1; - // Evil games like Turok2 pass double slashes e.g /app0//game.kpf - if (guest_directory[pos] == '/') { - pos++; - } - const auto rel_path = std::string_view(guest_directory).substr(pos); + pos = mount->mount.size() + 1; + const auto rel_path = std::string_view(corrected_path).substr(pos); const auto host_path = mount->host_path / rel_path; if (!NeedsCaseInsensiveSearch) { return host_path; diff --git a/src/core/libraries/app_content/app_content.cpp b/src/core/libraries/app_content/app_content.cpp index 7e9cf7a21..882f99e49 100644 --- a/src/core/libraries/app_content/app_content.cpp +++ b/src/core/libraries/app_content/app_content.cpp @@ -198,13 +198,9 @@ int PS4_SYSV_ABI sceAppContentTemporaryDataMount() { int PS4_SYSV_ABI sceAppContentTemporaryDataMount2(OrbisAppContentTemporaryDataOption option, OrbisAppContentMountPoint* mountPoint) { - if (std::string_view(mountPoint->data).empty()) // causing issues with save_data. + if (mountPoint == nullptr) return ORBIS_APP_CONTENT_ERROR_PARAMETER; - auto* param_sfo = Common::Singleton::Instance(); - std::string id(param_sfo->GetString("CONTENT_ID"), 7, 9); - const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::TempDataDir) / id; - auto* mnt = Common::Singleton::Instance(); - mnt->Mount(mount_dir, mountPoint->data); + strncpy(mountPoint->data, "/temp0", 16); LOG_INFO(Lib_AppContent, "sceAppContentTemporaryDataMount2: option = {}, mountPoint = {}", option, mountPoint->data); return ORBIS_OK; diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 8734b9649..4a42b0d6f 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -53,6 +53,9 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { if (std::string_view{path} == "/dev/stdout") { return 2002; } + if (std::string_view{path} == "/dev/urandom") { + return 2003; + } u32 handle = h->CreateHandle(); auto* file = h->GetFile(handle); if (directory) { @@ -113,6 +116,9 @@ int PS4_SYSV_ABI sceKernelClose(int d) { if (d < 3) { // d probably hold an error code return ORBIS_KERNEL_ERROR_EPERM; } + if (d == 2003) { // dev/urandom case + return SCE_OK; + } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { @@ -223,6 +229,13 @@ s64 PS4_SYSV_ABI posix_lseek(int d, s64 offset, int whence) { } s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) { + if (d == 2003) // dev urandom case + { + auto rbuf = static_cast(buf); + for (size_t i = 0; i < nbytes; i++) + rbuf[i] = std::rand() & 0xFF; + return nbytes; + } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { @@ -460,6 +473,7 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { } void fileSystemSymbolsRegister(Core::Loader::SymbolsResolver* sym) { + std::srand(std::time(nullptr)); LIB_FUNCTION("1G3lF1Gg1k8", "libkernel", 1, "libkernel", 1, 1, sceKernelOpen); LIB_FUNCTION("wuCroIGjt2g", "libScePosix", 1, "libkernel", 1, 1, posix_open); LIB_FUNCTION("UK2Tl2DWUns", "libkernel", 1, "libkernel", 1, 1, sceKernelClose); diff --git a/src/core/libraries/kernel/libkernel.cpp b/src/core/libraries/kernel/libkernel.cpp index f44d928bb..a7f619f1a 100644 --- a/src/core/libraries/kernel/libkernel.cpp +++ b/src/core/libraries/kernel/libkernel.cpp @@ -405,6 +405,9 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("VOx8NGmHXTs", "libkernel", 1, "libkernel", 1, 1, sceKernelGetCpumode); LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", 1, 1, sceKernelUuidCreate); + LIB_FUNCTION("2SKEx6bSq-4", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap); + LIB_FUNCTION("kBJzF8x4SyE", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap2); + // equeue LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue); LIB_FUNCTION("jpFjmgAC5AE", "libkernel", 1, "libkernel", 1, 1, sceKernelDeleteEqueue); diff --git a/src/core/libraries/kernel/memory_management.cpp b/src/core/libraries/kernel/memory_management.cpp index cdee3f465..988b69d0c 100644 --- a/src/core/libraries/kernel/memory_management.cpp +++ b/src/core/libraries/kernel/memory_management.cpp @@ -3,6 +3,7 @@ #include #include "common/alignment.h" +#include "common/assert.h" #include "common/logging/log.h" #include "common/singleton.h" #include "core/libraries/error_codes.h" @@ -225,4 +226,52 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut directMemoryEndOut); } +s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut) { + return sceKernelBatchMap2(entries, numEntries, numEntriesOut, + MemoryFlags::SCE_KERNEL_MAP_FIXED); // 0x10, 0x410? +} + +int PS4_SYSV_ABI sceKernelMunmap(void* addr, size_t len); + +s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut, int flags) { + int processed = 0; + int result = 0; + for (int i = 0; i < numEntries; i++) { + if (entries == nullptr || entries[i].length == 0 || entries[i].operation > 4) { + result = ORBIS_KERNEL_ERROR_EINVAL; + break; // break and assign a value to numEntriesOut. + } + + if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_MAP_DIRECT) { + result = sceKernelMapNamedDirectMemory(&entries[i].start, entries[i].length, + entries[i].protection, flags, + static_cast(entries[i].offset), 0, ""); + LOG_INFO( + Kernel_Vmm, + "BatchMap: entry = {}, operation = {}, len = {:#x}, offset = {:#x}, type = {}, " + "result = {}", + i, entries[i].operation, entries[i].length, entries[i].offset, (u8)entries[i].type, + result); + + if (result == 0) + processed++; + } else if (entries[i].operation == MemoryOpTypes::ORBIS_KERNEL_MAP_OP_UNMAP) { + result = sceKernelMunmap(entries[i].start, entries[i].length); + LOG_INFO(Kernel_Vmm, "BatchMap: entry = {}, operation = {}, len = {:#x}, result = {}", + i, entries[i].operation, entries[i].length, result); + + if (result == 0) + processed++; + } else { + UNREACHABLE_MSG("called: Unimplemented Operation = {}", entries[i].operation); + } + } + if (numEntriesOut != NULL) { // can be zero. do not return an error code. + *numEntriesOut = processed; + } + return result; +} + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory_management.h b/src/core/libraries/kernel/memory_management.h index 2a17f6ed8..cc89dfa7d 100644 --- a/src/core/libraries/kernel/memory_management.h +++ b/src/core/libraries/kernel/memory_management.h @@ -6,7 +6,7 @@ #include "common/bit_field.h" #include "common/types.h" -constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 5376_MB; // ~ 6GB +constexpr u64 SCE_KERNEL_MAIN_DMEM_SIZE = 6_GB; // ~ 6GB namespace Libraries::Kernel { @@ -31,6 +31,14 @@ enum MemoryProtection : u32 { SCE_KERNEL_PROT_GPU_RW = 0x30 // Permit reads/writes from the GPU }; +enum MemoryOpTypes : u32 { + ORBIS_KERNEL_MAP_OP_MAP_DIRECT = 0, + ORBIS_KERNEL_MAP_OP_UNMAP = 1, + ORBIS_KERNEL_MAP_OP_PROTECT = 2, + ORBIS_KERNEL_MAP_OP_MAP_FLEXIBLE = 3, + ORBIS_KERNEL_MAP_OP_TYPE_PROTECT = 4 +}; + struct OrbisQueryInfo { uintptr_t start; uintptr_t end; @@ -53,6 +61,16 @@ struct OrbisVirtualQueryInfo { std::array name; }; +struct OrbisKernelBatchMapEntry { + void* start; + off_t offset; + size_t length; + char protection; + char type; + short reserved; + int operation; +}; + u64 PS4_SYSV_ABI sceKernelGetDirectMemorySize(); int PS4_SYSV_ABI sceKernelAllocateDirectMemory(s64 searchStart, s64 searchEnd, u64 len, u64 alignment, int memoryType, s64* physAddrOut); @@ -85,4 +103,9 @@ int PS4_SYSV_ABI sceKernelGetDirectMemoryType(u64 addr, int* directMemoryTypeOut void** directMemoryStartOut, void** directMemoryEndOut); +s32 PS4_SYSV_ABI sceKernelBatchMap(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut); +s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEntries, + int* numEntriesOut, int flags); + } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index e536412fd..3e9e1994c 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -785,7 +785,22 @@ int PS4_SYSV_ABI posix_pthread_mutex_destroy(ScePthreadMutex* mutex) { int PS4_SYSV_ABI posix_pthread_cond_wait(ScePthreadCond* cond, ScePthreadMutex* mutex) { int result = scePthreadCondWait(cond, mutex); if (result < 0) { - UNREACHABLE(); + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; + } + return result; +} + +int PS4_SYSV_ABI posix_pthread_cond_timedwait(ScePthreadCond* cond, ScePthreadMutex* mutex, + u64 usec) { + int result = scePthreadCondTimedwait(cond, mutex, usec); + if (result < 0) { + int rt = result > SCE_KERNEL_ERROR_UNKNOWN && result <= SCE_KERNEL_ERROR_ESTOP + ? result + -SCE_KERNEL_ERROR_UNKNOWN + : POSIX_EOTHER; + return rt; } return result; } @@ -1321,10 +1336,23 @@ int PS4_SYSV_ABI posix_sem_wait(sem_t* sem) { return sem_wait(sem); } +int PS4_SYSV_ABI posix_sem_timedwait(sem_t* sem, const timespec* t) { +#ifndef __APPLE__ + return sem_timedwait(sem, t); +#else + LOG_ERROR(Kernel_Pthread, "Apple doesn't support sem_timedwait yet"); + return 0; // unsupported for apple yet +#endif +} + int PS4_SYSV_ABI posix_sem_post(sem_t* sem) { return sem_post(sem); } +int PS4_SYSV_ABI posix_sem_destroy(sem_t* sem) { + return sem_destroy(sem); +} + int PS4_SYSV_ABI posix_sem_getvalue(sem_t* sem, int* sval) { return sem_getvalue(sem, sval); } @@ -1350,6 +1378,11 @@ int PS4_SYSV_ABI scePthreadOnce(int* once_control, void (*init_routine)(void)) { UNREACHABLE(); } +[[noreturn]] void PS4_SYSV_ABI posix_pthread_exit(void* value_ptr) { + pthread_exit(value_ptr); + UNREACHABLE(); +} + int PS4_SYSV_ABI scePthreadGetthreadid() { return (int)(size_t)g_pthread_self; } @@ -1383,6 +1416,26 @@ int PS4_SYSV_ABI posix_pthread_condattr_setclock(ScePthreadCondattr* attr, clock return SCE_OK; } +int PS4_SYSV_ABI posix_pthread_getschedparam(ScePthread thread, int* policy, + SceKernelSchedParam* param) { + return scePthreadGetschedparam(thread, policy, param); +} + +int PS4_SYSV_ABI posix_pthread_setschedparam(ScePthread thread, int policy, + const SceKernelSchedParam* param) { + return scePthreadSetschedparam(thread, policy, param); +} + +int PS4_SYSV_ABI posix_pthread_attr_getschedpolicy(const ScePthreadAttr* attr, int* policy) { + return scePthreadAttrGetschedpolicy(attr, policy); +} + +int PS4_SYSV_ABI scePthreadRename(ScePthread thread, const char* name) { + thread->name = name; + LOG_INFO(Kernel_Pthread, "scePthreadRename: name = {}", thread->name); + return SCE_OK; +} + void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate); LIB_FUNCTION("0TyVk4MSLt0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_init); @@ -1401,11 +1454,13 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4qGrR6eoP9Y", "libkernel", 1, "libkernel", 1, 1, scePthreadDetach); LIB_FUNCTION("3PtV6p3QNX4", "libkernel", 1, "libkernel", 1, 1, scePthreadEqual); LIB_FUNCTION("3kg7rT0NQIs", "libkernel", 1, "libkernel", 1, 1, scePthreadExit); + LIB_FUNCTION("FJrT5LuUBAU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_exit); LIB_FUNCTION("7Xl257M4VNI", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_equal); LIB_FUNCTION("h9CcP3J0oVM", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_join); LIB_FUNCTION("EI-5-jlq2dE", "libkernel", 1, "libkernel", 1, 1, scePthreadGetthreadid); LIB_FUNCTION("1tKyG7RlMJo", "libkernel", 1, "libkernel", 1, 1, scePthreadGetprio); LIB_FUNCTION("W0Hpm2X0uPE", "libkernel", 1, "libkernel", 1, 1, scePthreadSetprio); + LIB_FUNCTION("GBUY7ywdULE", "libkernel", 1, "libkernel", 1, 1, scePthreadRename); LIB_FUNCTION("aI+OeCz8xrQ", "libkernel", 1, "libkernel", 1, 1, scePthreadSelf); LIB_FUNCTION("EotR8a3ASf4", "libkernel", 1, "libkernel", 1, 1, posix_pthread_self); @@ -1462,6 +1517,7 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); LIB_FUNCTION("Op8TBGY5KHg", "libkernel", 1, "libkernel", 1, 1, posix_pthread_cond_wait); LIB_FUNCTION("Op8TBGY5KHg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_wait); + LIB_FUNCTION("27bAgiJmOh0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_timedwait); LIB_FUNCTION("mkx2fVhNMsg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_cond_broadcast); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); LIB_FUNCTION("mDmgMOGVUqg", "libScePosix", 1, "libkernel", 1, 1, @@ -1476,6 +1532,8 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("EjllaAqAPZo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_condattr_setclock); LIB_FUNCTION("Z4QosVuAsA0", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_once); + LIB_FUNCTION("RtLRV-pBTTY", "libScePosix", 1, "libkernel", 1, 1, + posix_pthread_attr_getschedpolicy); // openorbis weird functions LIB_FUNCTION("7H0iTOciTLo", "libkernel", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); @@ -1490,9 +1548,13 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("+U1R4WtXvoc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_detach); LIB_FUNCTION("CBNtXOoef-E", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_max); LIB_FUNCTION("m0iS6jNsXds", "libScePosix", 1, "libkernel", 1, 1, posix_sched_get_priority_min); + LIB_FUNCTION("FIs3-UQT9sg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_getschedparam); + LIB_FUNCTION("Xs9hdiD7sAA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setschedparam); LIB_FUNCTION("pDuPEf3m4fI", "libScePosix", 1, "libkernel", 1, 1, posix_sem_init); LIB_FUNCTION("YCV5dGGBcCo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_wait); + LIB_FUNCTION("w5IHyvahg-o", "libScePosix", 1, "libkernel", 1, 1, posix_sem_timedwait); LIB_FUNCTION("IKP8typ0QUk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_post); + LIB_FUNCTION("cDW233RAwWo", "libScePosix", 1, "libkernel", 1, 1, posix_sem_destroy); LIB_FUNCTION("Bq+LRV-N6Hk", "libScePosix", 1, "libkernel", 1, 1, posix_sem_getvalue); // libs RwlockSymbolsRegister(sym); diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index bfa6a68db..370dba445 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -41,7 +41,6 @@ public: AddWaiter(waiter); // Perform the wait. - std::exchange(lk, std::unique_lock{waiter.mutex}); return waiter.Wait(lk, timeout); } @@ -59,10 +58,9 @@ public: it++; continue; } - std::scoped_lock lk2{waiter.mutex}; + it = wait_list.erase(it); token_count -= waiter.need_count; waiter.cv.notify_one(); - it = wait_list.erase(it); } return true; @@ -84,7 +82,6 @@ public: public: struct WaitingThread : public ListBaseHook { - std::mutex mutex; std::string name; std::condition_variable cv; u32 priority; diff --git a/src/core/libraries/playgo/playgo.cpp b/src/core/libraries/playgo/playgo.cpp index 1a335a2a3..a3af8b4c9 100644 --- a/src/core/libraries/playgo/playgo.cpp +++ b/src/core/libraries/playgo/playgo.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/logging/log.h" #include "common/singleton.h" #include "core/libraries/error_codes.h" @@ -8,8 +9,6 @@ #include "playgo.h" namespace Libraries::PlayGo { -// this lib is used to play as the game is being installed. -// can be skipped by just returning and assigning the correct values. s32 PS4_SYSV_ABI sceDbgPlayGoRequestNextChunk() { LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); @@ -52,9 +51,16 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh uint32_t numberOfEntries, OrbisPlayGoLocus* outLoci) { LOG_ERROR(Lib_PlayGo, "(STUBBED)called handle = {}, chunkIds = {}, numberOfEntries = {}", handle, *chunkIds, numberOfEntries); - // assign all now so that scePlayGoGetLocus is not called again for every single entry - std::fill(outLoci, outLoci + numberOfEntries, - OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST); + + auto* playgo = Common::Singleton::Instance(); + + for (uint32_t i = 0; i < numberOfEntries; i++) { + if (chunkIds[i] <= playgo->GetPlaygoHeader().mchunk_count) { + outLoci[i] = OrbisPlayGoLocusValue::ORBIS_PLAYGO_LOCUS_LOCAL_FAST; + } else { + return ORBIS_PLAYGO_ERROR_BAD_CHUNK_ID; + } + } return ORBIS_OK; } @@ -70,7 +76,7 @@ s32 PS4_SYSV_ABI scePlayGoGetProgress(OrbisPlayGoHandle handle, const OrbisPlayG s32 PS4_SYSV_ABI scePlayGoGetToDoList(OrbisPlayGoHandle handle, OrbisPlayGoToDo* outTodoList, u32 numberOfEntries, u32* outEntries) { LOG_ERROR(Lib_PlayGo, "(STUBBED)called"); - if (handle != shadMagic) + if (handle != 1) return ORBIS_PLAYGO_ERROR_BAD_HANDLE; if (outTodoList == nullptr) return ORBIS_PLAYGO_ERROR_BAD_POINTER; @@ -88,7 +94,7 @@ s32 PS4_SYSV_ABI scePlayGoInitialize(OrbisPlayGoInitParams* param) { } s32 PS4_SYSV_ABI scePlayGoOpen(OrbisPlayGoHandle* outHandle, const void* param) { - *outHandle = shadMagic; + *outHandle = 1; LOG_INFO(Lib_PlayGo, "(STUBBED)called"); return ORBIS_OK; } @@ -141,4 +147,4 @@ void RegisterlibScePlayGo(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("MPe0EeBGM-E", "libScePlayGo", 1, "libScePlayGo", 1, 0, scePlayGoTerminate); }; -} // namespace Libraries::PlayGo \ No newline at end of file +} // namespace Libraries::PlayGo diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp index db6d0964d..a8519ab79 100644 --- a/src/core/libraries/save_data/savedata.cpp +++ b/src/core/libraries/save_data/savedata.cpp @@ -186,21 +186,23 @@ int PS4_SYSV_ABI sceSaveDataDirNameSearch(const OrbisSaveDataDirNameSearchCond* if (!mount_dir.empty() && std::filesystem::exists(mount_dir)) { if (cond->dirName == nullptr) { // look for all dirs if no dir is provided. for (int i = 0; const auto& entry : std::filesystem::directory_iterator(mount_dir)) { - if (std::filesystem::is_directory(entry.path())) { + if (std::filesystem::is_directory(entry.path()) && + entry.path().filename().string() != "sdmemory") { + // sceSaveDataDirNameSearch does not search for dataMemory1/2 dirs. i++; result->dirNamesNum = 0; // why is it 1024? is it max? // copy dir name to be used by sceSaveDataMount in read mode. strncpy(result->dirNames[i].data, entry.path().filename().string().c_str(), 32); result->hitNum = i + 1; - result->dirNamesNum = i + 1; // to confirm - result->setNum = i + 1; // to confirm + result->dirNamesNum = i + 1; + result->setNum = i + 1; } } } else { // Need a game to test. strncpy(result->dirNames[0].data, cond->dirName->data, 32); result->hitNum = 1; - result->dirNamesNum = 1; // to confirm - result->setNum = 1; // to confirm + result->dirNamesNum = 1; + result->setNum = 1; } } else { result->hitNum = 0; @@ -321,7 +323,7 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataCount() { int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const size_t bufSize, const int64_t offset) { const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(userId) / game_serial / "save_mem1.sav"; + std::to_string(userId) / game_serial / "sdmemory/save_mem1.sav"; Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Read); if (!file.IsOpen()) { @@ -336,7 +338,7 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const u32 userId, void* buf, const int PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getParam) { const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(getParam->userId) / game_serial; + std::to_string(getParam->userId) / game_serial / "sdmemory"; if (getParam == nullptr) return ORBIS_SAVE_DATA_ERROR_PARAMETER; if (getParam->data != nullptr) { @@ -604,7 +606,7 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(const u32 userId, const void* buf, const size_t bufSize, const int64_t offset) { LOG_INFO(Lib_SaveData, "called"); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(userId) / game_serial / "save_mem1.sav"; + std::to_string(userId) / game_serial / "sdmemory/save_mem1.sav"; Common::FS::IOFile file(mount_dir, Common::FS::FileAccessMode::Write); file.Seek(offset); @@ -616,7 +618,7 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(const u32 userId, const void* buf, int PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* setParam) { LOG_INFO(Lib_SaveData, "called: dataNum = {}, slotId= {}", setParam->dataNum, setParam->slotId); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(setParam->userId) / game_serial; + std::to_string(setParam->userId) / game_serial / "sdmemory"; if (setParam->data != nullptr) { Common::FS::IOFile file(mount_dir / "save_mem2.sav", Common::FS::FileAccessMode::Write); if (!file.IsOpen()) @@ -644,7 +646,7 @@ int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(u32 userId, size_t memorySize, LOG_INFO(Lib_SaveData, "called:userId = {}, memorySize = {}", userId, memorySize); const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(userId) / game_serial; + std::to_string(userId) / game_serial / "sdmemory"; if (std::filesystem::exists(mount_dir)) { return ORBIS_SAVE_DATA_ERROR_EXISTS; @@ -663,7 +665,7 @@ int PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetup2 LOG_INFO(Lib_SaveData, "called"); // if (setupParam->option == 1) { // check this later. const auto& mount_dir = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir) / - std::to_string(setupParam->userId) / game_serial; + std::to_string(setupParam->userId) / game_serial / "sdmemory"; if (std::filesystem::exists(mount_dir) && std::filesystem::exists(mount_dir / "save_mem2.sav")) { Common::FS::IOFile file(mount_dir / "save_mem2.sav", Common::FS::FileAccessMode::Read); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 2e47d17d6..e4cbe5739 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -320,11 +320,15 @@ void Linker::InitTlsForThread(bool is_primary) { static constexpr size_t TlsAllocAlign = 0x20; const size_t total_tls_size = Common::AlignUp(static_tls_size, TlsAllocAlign) + TcbSize; + // If sceKernelMapNamedFlexibleMemory is being called from libkernel and addr = 0 + // it automatically places mappings in system reserved area instead of managed. + static constexpr VAddr KernelAllocBase = 0x880000000ULL; + // The kernel module has a few different paths for TLS allocation. // For SDK < 1.7 it allocates both main and secondary thread blocks using libc mspace/malloc. // In games compiled with newer SDK, the main thread gets mapped from flexible memory, // with addr = 0, so system managed area. Here we will only implement the latter. - void* addr_out{}; + void* addr_out{reinterpret_cast(KernelAllocBase)}; if (is_primary) { const size_t tls_aligned = Common::AlignUp(total_tls_size, 16_KB); const int ret = Libraries::Kernel::sceKernelMapNamedFlexibleMemory( diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 9326ccaad..f2607bffd 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -4,7 +4,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/debug.h" -#include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/libraries/kernel/memory_management.h" #include "core/memory.h" @@ -55,7 +54,7 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr; // Add the allocated region to the list and commit its pages. - auto& area = AddDmemAllocation(free_addr, size); + auto& area = CarveDmemArea(free_addr, size); area.memory_type = memory_type; area.is_free = false; return free_addr; @@ -100,29 +99,32 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem alignment = alignment > 0 ? alignment : 16_KB; VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr; + // Fixed mapping means the virtual address must exactly match the provided one. + if (True(flags & MemoryMapFlags::Fixed)) { + const auto& vma = FindVMA(mapped_addr)->second; + // If the VMA is mapped, unmap the region first. + if (vma.IsMapped()) { + ASSERT_MSG(vma.base == mapped_addr && vma.size == size, + "Region must match when reserving a mapped region"); + UnmapMemory(mapped_addr, size); + } + const size_t remaining_size = vma.base + vma.size - mapped_addr; + ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + } + // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { - auto it = FindVMA(mapped_addr); - // If the VMA is free and contains the requested mapping we are done. - if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) { - mapped_addr = virtual_addr; - } else { - // Search for the first free VMA that fits our mapping. - while (it->second.type != VMAType::Free || it->second.size < size) { - it++; - } - ASSERT(it != vma_map.end()); - const auto& vma = it->second; - mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base; - } + mapped_addr = SearchFree(mapped_addr, size, alignment); } // Add virtual memory area - auto& new_vma = AddMapping(mapped_addr, size); + const auto new_vma_handle = CarveVMA(mapped_addr, size); + auto& new_vma = new_vma_handle->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = MemoryProt::NoAccess; new_vma.name = ""; new_vma.type = VMAType::Reserved; + MergeAdjacent(vma_map, new_vma_handle); *out_addr = std::bit_cast(mapped_addr); return ORBIS_OK; @@ -132,6 +134,9 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M MemoryMapFlags flags, VMAType type, std::string_view name, bool is_exec, PAddr phys_addr, u64 alignment) { std::scoped_lock lk{mutex}; + + // Certain games perform flexible mappings on loop to determine + // the available flexible memory size. Questionable but we need to handle this. if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) { return SCE_KERNEL_ERROR_ENOMEM; } @@ -140,91 +145,63 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // flag so we will take the branch that searches for free (or reserved) mappings. virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; alignment = alignment > 0 ? alignment : 16_KB; - VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr; - SCOPE_EXIT { - auto& new_vma = AddMapping(mapped_addr, size); - new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); - new_vma.prot = prot; - new_vma.name = name; - new_vma.type = type; - - if (type == VMAType::Direct) { - new_vma.phys_base = phys_addr; - MapVulkanMemory(mapped_addr, size); - } - if (type == VMAType::Flexible) { - flexible_usage += size; - } - }; // Fixed mapping means the virtual address must exactly match the provided one. - if (True(flags & MemoryMapFlags::Fixed) && True(flags & MemoryMapFlags::NoOverwrite)) { + if (True(flags & MemoryMapFlags::Fixed)) { // This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen. const auto& vma = FindVMA(mapped_addr)->second; const size_t remaining_size = vma.base + vma.size - mapped_addr; - ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size); + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); } // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { - auto it = FindVMA(mapped_addr); - // If the VMA is free and contains the requested mapping we are done. - if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) { - mapped_addr = virtual_addr; - } else { - // Search for the first free VMA that fits our mapping. - while (it->second.type != VMAType::Free || it->second.size < size) { - it++; - } - ASSERT(it != vma_map.end()); - const auto& vma = it->second; - mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base; - } + mapped_addr = SearchFree(mapped_addr, size, alignment); } // Perform the mapping. *out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec); TRACK_ALLOC(*out_addr, size, "VMEM"); + + auto& new_vma = CarveVMA(mapped_addr, size)->second; + new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); + new_vma.prot = prot; + new_vma.name = name; + new_vma.type = type; + + if (type == VMAType::Direct) { + new_vma.phys_base = phys_addr; + MapVulkanMemory(mapped_addr, size); + } + if (type == VMAType::Flexible) { + flexible_usage += size; + } + return ORBIS_OK; } int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot, MemoryMapFlags flags, uintptr_t fd, size_t offset) { - if (virtual_addr == 0) { - virtual_addr = impl.SystemManagedVirtualBase(); - } else { - LOG_INFO(Kernel_Vmm, "Virtual addr {:#x} with size {:#x}", virtual_addr, size); - } - - VAddr mapped_addr = 0; + VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; const size_t size_aligned = Common::AlignUp(size, 16_KB); // Find first free area to map the file. if (False(flags & MemoryMapFlags::Fixed)) { - auto it = FindVMA(virtual_addr); - while (it->second.type != VMAType::Free || it->second.size < size_aligned) { - it++; - } - ASSERT(it != vma_map.end()); - - mapped_addr = it->second.base; + mapped_addr = SearchFree(mapped_addr, size_aligned); } if (True(flags & MemoryMapFlags::Fixed)) { const auto& vma = FindVMA(virtual_addr)->second; const size_t remaining_size = vma.base + vma.size - virtual_addr; - ASSERT_MSG((vma.type == VMAType::Free || vma.type == VMAType::Reserved) && - remaining_size >= size); - - mapped_addr = virtual_addr; + ASSERT_MSG(!vma.IsMapped() && remaining_size >= size); } // Map the file. impl.MapFile(mapped_addr, size, offset, std::bit_cast(prot), fd); // Add virtual memory area - auto& new_vma = AddMapping(mapped_addr, size_aligned); + auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second; new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce); new_vma.prot = prot; new_vma.name = "File"; @@ -238,10 +215,9 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { std::scoped_lock lk{mutex}; - // TODO: Partial unmaps are technically supported by the guest. - const auto it = vma_map.find(virtual_addr); - ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, - "Attempting to unmap partially mapped range"); + const auto it = FindVMA(virtual_addr); + ASSERT_MSG(it->second.Contains(virtual_addr, size), + "Existing mapping does not contain requested unmap range"); const auto type = it->second.type; const bool has_backing = type == VMAType::Direct || type == VMAType::File; @@ -253,11 +229,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { } // Mark region as free and attempt to coalesce it with neighbours. - auto& vma = it->second; + const auto new_it = CarveVMA(virtual_addr, size); + auto& vma = new_it->second; vma.type = VMAType::Free; vma.prot = MemoryProt::NoAccess; vma.phys_base = 0; - MergeAdjacent(vma_map, it); + vma.disallow_merge = false; + MergeAdjacent(vma_map, new_it); // Unmap the memory region. impl.Unmap(virtual_addr, size, has_backing); @@ -288,10 +266,10 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, std::scoped_lock lk{mutex}; auto it = FindVMA(addr); - if (it->second.type == VMAType::Free && flags == 1) { + if (!it->second.IsMapped() && flags == 1) { it++; } - if (it->second.type == VMAType::Free) { + if (!it->second.IsMapped()) { LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region"); return ORBIS_KERNEL_ERROR_EACCES; } @@ -360,14 +338,38 @@ std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { return std::make_pair(*it->second.buffer, addr - it->first); } -VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { +VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) { + auto it = FindVMA(virtual_addr); + // If the VMA is free and contains the requested mapping we are done. + if (it->second.IsFree() && it->second.Contains(virtual_addr, size)) { + return virtual_addr; + } + // Search for the first free VMA that fits our mapping. + const auto is_suitable = [&] { + if (!it->second.IsFree()) { + return false; + } + const auto& vma = it->second; + virtual_addr = Common::AlignUp(vma.base, alignment); + // Sometimes the alignment itself might be larger than the VMA. + if (virtual_addr > vma.base + vma.size) { + return false; + } + const size_t remaining_size = vma.base + vma.size - virtual_addr; + return remaining_size >= size; + }; + while (!is_suitable()) { + it++; + } + return virtual_addr; +} + +MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); const VirtualMemoryArea& vma = vma_handle->second; - ASSERT_MSG((vma.type == VMAType::Free || vma.type == VMAType::Reserved) && - vma.base <= virtual_addr, - "Adding a mapping to already mapped region"); + ASSERT_MSG(vma.base <= virtual_addr, "Adding a mapping to already mapped region"); const VAddr start_in_vma = virtual_addr - vma.base; const VAddr end_in_vma = start_in_vma + size; @@ -382,10 +384,10 @@ VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { vma_handle = Split(vma_handle, start_in_vma); } - return vma_handle->second; + return vma_handle; } -DirectMemoryArea& MemoryManager::AddDmemAllocation(PAddr addr, size_t size) { +DirectMemoryArea& MemoryManager::CarveDmemArea(PAddr addr, size_t size) { auto dmem_handle = FindDmemArea(addr); ASSERT_MSG(dmem_handle != dmem_map.end(), "Physical address not in dmem_map"); diff --git a/src/core/memory.h b/src/core/memory.h index 93aef2d8c..ff4af5cd2 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -89,7 +89,15 @@ struct VirtualMemoryArea { uintptr_t fd = 0; bool Contains(VAddr addr, size_t size) const { - return addr >= base && (addr + size) < (base + this->size); + return addr >= base && (addr + size) <= (base + this->size); + } + + bool IsFree() const noexcept { + return type == VMAType::Free; + } + + bool IsMapped() const noexcept { + return type != VMAType::Free && type != VMAType::Reserved; } bool CanMergeWith(const VirtualMemoryArea& next) const { @@ -198,9 +206,11 @@ private: return iter; } - VirtualMemoryArea& AddMapping(VAddr virtual_addr, size_t size); + VAddr SearchFree(VAddr virtual_addr, size_t size, u32 alignment = 0); - DirectMemoryArea& AddDmemAllocation(PAddr addr, size_t size); + VMAHandle CarveVMA(VAddr virtual_addr, size_t size); + + DirectMemoryArea& CarveDmemArea(PAddr addr, size_t size); VMAHandle Split(VMAHandle vma_handle, size_t offset_in_vma); diff --git a/src/emulator.cpp b/src/emulator.cpp index 04a965b32..5b162e056 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,10 +1,18 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include +#include +#include +#include +#include +#include +#include +#include +#include #include "common/config.h" #include "common/debug.h" #include "common/logging/backend.h" +#include "common/logging/log.h" #include "common/ntapi.h" #include "common/path_util.h" #include "common/polyfill_thread.h" @@ -24,6 +32,8 @@ #include "emulator.h" #include "video_core/renderdoc.h" +#include + Frontend::WindowSDL* g_window = nullptr; namespace Core { @@ -50,8 +60,6 @@ Emulator::Emulator() { memory = Core::Memory::Instance(); controller = Common::Singleton::Instance(); linker = Common::Singleton::Instance(); - window = std::make_unique(WindowWidth, WindowHeight, controller); - g_window = window.get(); // Load renderdoc module. VideoCore::LoadRenderDoc(); @@ -69,6 +77,8 @@ void Emulator::Run(const std::filesystem::path& file) { // Loading param.sfo file if exists std::string id; + std::string title; + std::string app_version; std::filesystem::path sce_sys_folder = file.parent_path() / "sce_sys"; if (std::filesystem::is_directory(sce_sys_folder)) { for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) { @@ -76,11 +86,14 @@ void Emulator::Run(const std::filesystem::path& file) { auto* param_sfo = Common::Singleton::Instance(); param_sfo->open(sce_sys_folder.string() + "/param.sfo", {}); id = std::string(param_sfo->GetString("CONTENT_ID"), 7, 9); - std::string title(param_sfo->GetString("TITLE")); + title = param_sfo->GetString("TITLE"); LOG_INFO(Loader, "Game id: {} Title: {}", id, title); u32 fw_version = param_sfo->GetInteger("SYSTEM_VER"); - std::string app_version = param_sfo->GetString("APP_VER"); + app_version = param_sfo->GetString("APP_VER"); LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); + } else if (entry.path().filename() == "playgo-chunk.dat") { + auto* playgo = Common::Singleton::Instance(); + playgo->Open(sce_sys_folder.string() + "/playgo-chunk.dat"); } else if (entry.path().filename() == "pic0.png" || entry.path().filename() == "pic1.png") { auto* splash = Common::Singleton::Instance(); @@ -93,6 +106,12 @@ void Emulator::Run(const std::filesystem::path& file) { } } } + std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); + + window = + std::make_unique(WindowWidth, WindowHeight, controller, game_title); + + g_window = window.get(); const auto& mount_data_dir = Common::FS::GetUserPath(Common::FS::PathType::GameDataDir) / id; if (!std::filesystem::exists(mount_data_dir)) { @@ -105,6 +124,13 @@ void Emulator::Run(const std::filesystem::path& file) { } mnt->Mount(mount_temp_dir, "/temp0"); // called in app_content ==> stat/mkdir + const auto& mount_download_dir = + Common::FS::GetUserPath(Common::FS::PathType::DownloadDir) / id; + if (!std::filesystem::exists(mount_download_dir)) { + std::filesystem::create_directory(mount_download_dir); + } + mnt->Mount(mount_download_dir, "/download0"); + const auto& mount_captures_dir = Common::FS::GetUserPath(Common::FS::PathType::CapturesDir); if (!std::filesystem::exists(mount_captures_dir)) { std::filesystem::create_directory(mount_captures_dir); @@ -151,10 +177,12 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", nullptr}, {"libSceFiber.sprx", nullptr}, {"libSceUlt.sprx", nullptr}, + {"libSceJson.sprx", nullptr}, + {"libSceJson2.sprx", nullptr}, {"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal}, {"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap}, {"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc}, diff --git a/src/sdl_window.cpp b/src/sdl_window.cpp index 9cf3e6cb7..2da246107 100644 --- a/src/sdl_window.cpp +++ b/src/sdl_window.cpp @@ -19,14 +19,15 @@ namespace Frontend { -WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_) +WindowSDL::WindowSDL(s32 width_, s32 height_, Input::GameController* controller_, + std::string_view game_title) : width{width_}, height{height_}, controller{controller_} { if (SDL_Init(SDL_INIT_VIDEO) < 0) { UNREACHABLE_MSG("Failed to initialize SDL video subsystem: {}", SDL_GetError()); } SDL_InitSubSystem(SDL_INIT_AUDIO); - const std::string title = "shadPS4 v" + std::string(Common::VERSION); + const std::string title = fmt::format("shadPS4 v{} | {}", Common::VERSION, game_title); SDL_PropertiesID props = SDL_CreateProperties(); SDL_SetStringProperty(props, SDL_PROP_WINDOW_CREATE_TITLE_STRING, title.c_str()); SDL_SetNumberProperty(props, SDL_PROP_WINDOW_CREATE_X_NUMBER, SDL_WINDOWPOS_CENTERED); diff --git a/src/sdl_window.h b/src/sdl_window.h index 6e14fbd0e..89b2a8771 100644 --- a/src/sdl_window.h +++ b/src/sdl_window.h @@ -3,6 +3,7 @@ #pragma once +#include #include "common/types.h" struct SDL_Window; @@ -40,7 +41,8 @@ struct WindowSystemInfo { class WindowSDL { public: - explicit WindowSDL(s32 width, s32 height, Input::GameController* controller); + explicit WindowSDL(s32 width, s32 height, Input::GameController* controller, + std::string_view game_title); ~WindowSDL(); s32 getWidth() const { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index e2b411e47..80dd66b16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -258,6 +258,7 @@ Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitSMulExt(EmitContext& ctx, Id a, Id b); Id EmitUMulExt(EmitContext& ctx, Id a, Id b); Id EmitIMul32(EmitContext& ctx, Id a, Id b); +Id EmitIMul64(EmitContext& ctx, Id a, Id b); Id EmitSDiv32(EmitContext& ctx, Id a, Id b); Id EmitUDiv32(EmitContext& ctx, Id a, Id b); Id EmitINeg32(EmitContext& ctx, Id value); @@ -271,6 +272,7 @@ Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); +Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); @@ -286,8 +288,10 @@ Id EmitSMax32(EmitContext& ctx, Id a, Id b); Id EmitUMax32(EmitContext& ctx, Id a, Id b); Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index d5a0f2767..019ceb01b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -84,6 +84,10 @@ Id EmitIMul32(EmitContext& ctx, Id a, Id b) { return ctx.OpIMul(ctx.U32[1], a, b); } +Id EmitIMul64(EmitContext& ctx, Id a, Id b) { + return ctx.OpIMul(ctx.U64, a, b); +} + Id EmitSDiv32(EmitContext& ctx, Id a, Id b) { return ctx.OpSDiv(ctx.U32[1], a, b); } @@ -142,6 +146,13 @@ Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { return result; } +Id EmitBitwiseOr64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { + const Id result{ctx.OpBitwiseOr(ctx.U64, a, b)}; + SetZeroFlag(ctx, inst, result); + SetSignFlag(ctx, inst, result); + return result; +} + Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; SetZeroFlag(ctx, inst, result); @@ -231,11 +242,19 @@ Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { return result; } -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); } -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpULessThan(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 16c10f53c..9ce87add2 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -388,6 +388,10 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Unorm) { return spv::ImageFormat::Rgba8; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { + return spv::ImageFormat::Rgba8ui; + } UNREACHABLE(); } diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index d38140d8f..cdc1e4746 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -2392,10 +2392,10 @@ enum class OperandField : u32 { ConstFloatPos_4_0, ConstFloatNeg_4_0, VccZ = 251, - ExecZ, - Scc, - LdsDirect, - LiteralConst, + ExecZ = 252, + Scc = 253, + LdsDirect = 254, + LiteralConst = 255, VectorGPR, Undefined = 0xFFFFFFFF, diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 9e67e82e5..c4c6e5052 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -76,21 +76,21 @@ void Translator::EmitPrologue() { } } +template <> IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { - // Input modifiers work on float values. - force_flt |= operand.input_modifier.abs | operand.input_modifier.neg; - IR::U32F32 value{}; + + const bool is_float = operand.type == ScalarType::Float32 || force_flt; switch (operand.field) { case OperandField::ScalarGPR: - if (operand.type == ScalarType::Float32 || force_flt) { + if (is_float) { value = ir.GetScalarReg(IR::ScalarReg(operand.code)); } else { value = ir.GetScalarReg(IR::ScalarReg(operand.code)); } break; case OperandField::VectorGPR: - if (operand.type == ScalarType::Float32 || force_flt) { + if (is_float) { value = ir.GetVectorReg(IR::VectorReg(operand.code)); } else { value = ir.GetVectorReg(IR::VectorReg(operand.code)); @@ -164,15 +164,160 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { UNREACHABLE(); } - if (operand.input_modifier.abs) { - value = ir.FPAbs(value); - } - if (operand.input_modifier.neg) { - value = ir.FPNeg(value); + if (is_float) { + if (operand.input_modifier.abs) { + value = ir.FPAbs(value); + } + if (operand.input_modifier.neg) { + value = ir.FPNeg(value); + } } return value; } +template <> +IR::U32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { + return GetSrc(operand, force_flt); +} + +template <> +IR::F32 Translator::GetSrc(const InstOperand& operand, bool) { + return GetSrc(operand, true); +} + +template <> +IR::U64F64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) { + IR::Value value_hi{}; + IR::Value value_lo{}; + + bool immediate = false; + const bool is_float = operand.type == ScalarType::Float64 || force_flt; + switch (operand.field) { + case OperandField::ScalarGPR: + if (is_float) { + value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); + value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); + } else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) { + value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); + value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); + } else { + UNREACHABLE(); + } + break; + case OperandField::VectorGPR: + if (is_float) { + value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); + value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); + } else if (operand.type == ScalarType::Uint64 || operand.type == ScalarType::Sint64) { + value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); + value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); + } else { + UNREACHABLE(); + } + break; + case OperandField::ConstZero: + immediate = true; + if (force_flt) { + value_lo = ir.Imm64(0.0); + } else { + value_lo = ir.Imm64(u64(0U)); + } + break; + case OperandField::SignedConstIntPos: + ASSERT(!force_flt); + immediate = true; + value_lo = ir.Imm64(s64(operand.code) - SignedConstIntPosMin + 1); + break; + case OperandField::SignedConstIntNeg: + ASSERT(!force_flt); + immediate = true; + value_lo = ir.Imm64(-s64(operand.code) + SignedConstIntNegMin - 1); + break; + case OperandField::LiteralConst: + immediate = true; + if (force_flt) { + UNREACHABLE(); // There is a literal double? + } else { + value_lo = ir.Imm64(u64(operand.code)); + } + break; + case OperandField::ConstFloatPos_1_0: + immediate = true; + if (force_flt) { + value_lo = ir.Imm64(1.0); + } else { + value_lo = ir.Imm64(std::bit_cast(f64(1.0))); + } + break; + case OperandField::ConstFloatPos_0_5: + immediate = true; + value_lo = ir.Imm64(0.5); + break; + case OperandField::ConstFloatPos_2_0: + immediate = true; + value_lo = ir.Imm64(2.0); + break; + case OperandField::ConstFloatPos_4_0: + immediate = true; + value_lo = ir.Imm64(4.0); + break; + case OperandField::ConstFloatNeg_0_5: + immediate = true; + value_lo = ir.Imm64(-0.5); + break; + case OperandField::ConstFloatNeg_1_0: + immediate = true; + value_lo = ir.Imm64(-1.0); + break; + case OperandField::ConstFloatNeg_2_0: + immediate = true; + value_lo = ir.Imm64(-2.0); + break; + case OperandField::ConstFloatNeg_4_0: + immediate = true; + value_lo = ir.Imm64(-4.0); + break; + case OperandField::VccLo: { + value_lo = ir.GetVccLo(); + value_hi = ir.GetVccHi(); + } break; + case OperandField::VccHi: + UNREACHABLE(); + default: + UNREACHABLE(); + } + + IR::Value value; + + if (immediate) { + value = value_lo; + } else if (is_float) { + throw NotImplementedException("required OpPackDouble2x32 implementation"); + } else { + IR::Value packed = ir.CompositeConstruct(value_lo, value_hi); + value = ir.PackUint2x32(packed); + } + + if (is_float) { + if (operand.input_modifier.abs) { + value = ir.FPAbs(IR::F32F64(value)); + } + if (operand.input_modifier.neg) { + value = ir.FPNeg(IR::F32F64(value)); + } + } + return IR::U64F64(value); +} + +template <> +IR::U64 Translator::GetSrc64(const InstOperand& operand, bool force_flt) { + return GetSrc64(operand, force_flt); +} +template <> +IR::F64 Translator::GetSrc64(const InstOperand& operand, bool) { + return GetSrc64(operand, true); +} + void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; if (operand.output_modifier.multiplier != 0.f) { @@ -197,6 +342,43 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { } } +void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_raw) { + IR::U64F64 value_untyped = value_raw; + + const bool is_float = value_raw.Type() == IR::Type::F64 || value_raw.Type() == IR::Type::F32; + if (is_float) { + if (operand.output_modifier.multiplier != 0.f) { + value_untyped = + ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier))); + } + if (operand.output_modifier.clamp) { + value_untyped = ir.FPSaturate(value_raw); + } + } + const IR::U64 value = + is_float ? ir.BitCast(IR::F64{value_untyped}) : IR::U64{value_untyped}; + + const IR::Value unpacked{ir.UnpackUint2x32(value)}; + const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)}; + const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)}; + switch (operand.field) { + case OperandField::ScalarGPR: + ir.SetScalarReg(IR::ScalarReg(operand.code + 1), hi); + return ir.SetScalarReg(IR::ScalarReg(operand.code), lo); + case OperandField::VectorGPR: + ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi); + return ir.SetVectorReg(IR::VectorReg(operand.code), lo); + case OperandField::VccLo: + UNREACHABLE(); + case OperandField::VccHi: + UNREACHABLE(); + case OperandField::M0: + break; + default: + UNREACHABLE(); + } +} + void Translator::EmitFetch(const GcnInst& inst) { // Read the pointer to the fetch shader assembly. const u32 sgpr_base = inst.src[0].code; @@ -320,6 +502,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::V_ADD_I32: translator.V_ADD_I32(inst); break; + case Opcode::V_ADDC_U32: + translator.V_ADDC_U32(inst); + break; case Opcode::V_CVT_F32_I32: translator.V_CVT_F32_I32(inst); break; @@ -470,6 +655,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::IMAGE_LOAD: translator.IMAGE_LOAD(false, inst); break; + case Opcode::V_MAD_U64_U32: + translator.V_MAD_U64_U32(inst); + break; case Opcode::V_CMP_GE_I32: translator.V_CMP_U32(ConditionOp::GE, true, false, inst); break; @@ -612,6 +800,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::BUFFER_STORE_DWORD: translator.BUFFER_STORE_FORMAT(1, false, inst); break; + case Opcode::BUFFER_STORE_DWORDX2: + translator.BUFFER_STORE_FORMAT(2, false, inst); + break; case Opcode::BUFFER_STORE_DWORDX3: translator.BUFFER_STORE_FORMAT(3, false, inst); break; diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2aa6f7124..3203ad730 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -100,6 +100,7 @@ public: void V_AND_B32(const GcnInst& inst); void V_LSHLREV_B32(const GcnInst& inst); void V_ADD_I32(const GcnInst& inst); + void V_ADDC_U32(const GcnInst& inst); void V_CVT_F32_I32(const GcnInst& inst); void V_CVT_F32_U32(const GcnInst& inst); void V_MAD_F32(const GcnInst& inst); @@ -129,6 +130,7 @@ public: void V_CVT_U32_F32(const GcnInst& inst); void V_SUBREV_F32(const GcnInst& inst); void V_SUBREV_I32(const GcnInst& inst); + void V_MAD_U64_U32(const GcnInst& inst); void V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst); void V_LSHRREV_B32(const GcnInst& inst); void V_MUL_HI_U32(bool is_signed, const GcnInst& inst); @@ -186,8 +188,12 @@ public: void EXP(const GcnInst& inst); private: - IR::U32F32 GetSrc(const InstOperand& operand, bool flt_zero = false); + template + [[nodiscard]] T GetSrc(const InstOperand& operand, bool flt_zero = false); + template + [[nodiscard]] T GetSrc64(const InstOperand& operand, bool flt_zero = false); void SetDst(const InstOperand& operand, const IR::U32F32& value); + void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); private: IR::IREmitter ir; diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index ca648f882..1b2024f89 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -67,7 +67,8 @@ void Translator::V_OR_B32(bool is_xor, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg(dst_reg, is_xor ? ir.BitwiseXor(src0, src1) : ir.BitwiseOr(src0, src1)); + ir.SetVectorReg(dst_reg, + is_xor ? ir.BitwiseXor(src0, src1) : IR::U32(ir.BitwiseOr(src0, src1))); } void Translator::V_AND_B32(const GcnInst& inst) { @@ -92,6 +93,30 @@ void Translator::V_ADD_I32(const GcnInst& inst) { // TODO: Carry } +void Translator::V_ADDC_U32(const GcnInst& inst) { + + const auto src0 = GetSrc(inst.src[0]); + const auto src1 = GetSrc(inst.src[1]); + + IR::U32 scarry; + if (inst.src_count == 3) { // VOP3 + IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))}; + scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))}; + } else { // VOP2 + scarry = ir.GetVccLo(); + } + + const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); + + const IR::VectorReg dst_reg{inst.dst[0].code}; + ir.SetVectorReg(dst_reg, result); + + const IR::U1 less_src0 = ir.ILessThan(result, src0, false); + const IR::U1 less_src1 = ir.ILessThan(result, src1, false); + const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); + ir.SetVcc(did_overflow); +} + void Translator::V_CVT_F32_I32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; @@ -294,6 +319,23 @@ void Translator::V_SUBREV_I32(const GcnInst& inst) { // TODO: Carry-out } +void Translator::V_MAD_U64_U32(const GcnInst& inst) { + + const auto src0 = GetSrc(inst.src[0]); + const auto src1 = GetSrc(inst.src[1]); + const auto src2 = GetSrc64(inst.src[2]); + + const IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1)); + const IR::U64 sum_result = ir.IAdd(mul_result, src2); + + SetDst64(inst.dst[0], sum_result); + + const IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false); + const IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false); + const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); + ir.SetVcc(did_overflow); +} + void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index cd4fdaa29..6ea3123dd 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -964,8 +964,18 @@ IR::Value IREmitter::IMulExt(const U32& a, const U32& b, bool is_signed) { return Inst(is_signed ? Opcode::SMulExt : Opcode::UMulExt, a, b); } -U32 IREmitter::IMul(const U32& a, const U32& b) { - return Inst(Opcode::IMul32, a, b); +U32U64 IREmitter::IMul(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::IMul32, a, b); + case Type::U64: + return Inst(Opcode::IMul64, a, b); + default: + ThrowInvalidType(a.Type()); + } } U32 IREmitter::IDiv(const U32& a, const U32& b, bool is_signed) { @@ -1024,8 +1034,18 @@ U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { return Inst(Opcode::BitwiseAnd32, a, b); } -U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { - return Inst(Opcode::BitwiseOr32, a, b); +U32U64 IREmitter::BitwiseOr(const U32U64& a, const U32U64& b) { + if (a.Type() != b.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); + } + switch (a.Type()) { + case Type::U32: + return Inst(Opcode::BitwiseOr32, a, b); + case Type::U64: + return Inst(Opcode::BitwiseOr64, a, b); + default: + ThrowInvalidType(a.Type()); + } } U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { @@ -1095,8 +1115,18 @@ U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { return Inst(Opcode::UClamp32, value, min, max); } -U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +U1 IREmitter::ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SLessThan32 : Opcode::ULessThan32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SLessThan64 : Opcode::ULessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { @@ -1155,8 +1185,9 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const F32F64& value) { ThrowInvalidType(value.Type()); } default: - UNREACHABLE_MSG("Invalid destination bitsize {}", bitsize); + break; } + throw NotImplementedException("Invalid destination bitsize {}", bitsize); } U32U64 IREmitter::ConvertFToU(size_t bitsize, const F32F64& value) { @@ -1183,13 +1214,17 @@ F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Val switch (src_bitsize) { case 32: return Inst(Opcode::ConvertF32S32, value); + default: + break; } - break; case 64: switch (src_bitsize) { case 32: return Inst(Opcode::ConvertF64S32, value); + default: + break; } + default: break; } UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); @@ -1203,13 +1238,17 @@ F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Val return Inst(Opcode::ConvertF32U16, value); case 32: return Inst(Opcode::ConvertF32U32, value); + default: + break; } - break; case 64: switch (src_bitsize) { case 32: return Inst(Opcode::ConvertF64U32, value); + default: + break; } + default: break; } UNREACHABLE_MSG("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); @@ -1227,7 +1266,11 @@ U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U16U32U64& value) { switch (value.Type()) { case Type::U32: return Inst(Opcode::ConvertU16U32, value); + default: + break; } + default: + break; } throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); } @@ -1238,13 +1281,17 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { switch (value.Type()) { case Type::F32: return Inst(Opcode::ConvertF16F32, value); + default: + break; } - break; case 32: switch (value.Type()) { case Type::F16: return Inst(Opcode::ConvertF32F16, value); + default: + break; } + default: break; } throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index e7512430a..7ee4e8240 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -159,7 +159,7 @@ public: [[nodiscard]] Value IAddCary(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] Value IMulExt(const U32& a, const U32& b, bool is_signed = false); - [[nodiscard]] U32 IMul(const U32& a, const U32& b); + [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IDiv(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 INeg(const U32U64& value); [[nodiscard]] U32 IAbs(const U32& value); @@ -167,7 +167,7 @@ public: [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); - [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); + [[nodiscard]] U32U64 BitwiseOr(const U32U64& a, const U32U64& b); [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, const U32& count); @@ -188,7 +188,7 @@ public: [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); - [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 9aefc8b39..628b8d4fa 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -227,6 +227,7 @@ OPCODE(IAddCary32, U32x2, U32, OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) +OPCODE(IMul64, U64, U64, U64, ) OPCODE(SMulExt, U32x2, U32, U32, ) OPCODE(UMulExt, U32x2, U32, U32, ) OPCODE(SDiv32, U32, U32, U32, ) @@ -242,6 +243,7 @@ OPCODE(ShiftRightArithmetic32, U32, U32, OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) OPCODE(BitwiseAnd32, U32, U32, U32, ) OPCODE(BitwiseOr32, U32, U32, U32, ) +OPCODE(BitwiseOr64, U64, U64, U64, ) OPCODE(BitwiseXor32, U32, U32, U32, ) OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) @@ -258,8 +260,10 @@ OPCODE(SMax32, U32, U32, OPCODE(UMax32, U32, U32, U32, ) OPCODE(SClamp32, U32, U32, U32, U32, ) OPCODE(UClamp32, U32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(SLessThan32, U1, U32, U32, ) +OPCODE(SLessThan64, U1, U64, U64, ) +OPCODE(ULessThan32, U1, U32, U32, ) +OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(IEqual, U1, U32, U32, ) OPCODE(SLessThanEqual, U1, U32, U32, ) OPCODE(ULessThanEqual, U1, U32, U32, ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp index 7cd896fbd..13c0246ea 100644 --- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -21,6 +21,8 @@ template return value.F32(); } else if constexpr (std::is_same_v) { return value.U64(); + } else if constexpr (std::is_same_v) { + return static_cast(value.U64()); } } @@ -281,12 +283,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldLogicalOr(inst); case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); - case IR::Opcode::SLessThan: + case IR::Opcode::SLessThan32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); return; - case IR::Opcode::ULessThan: + case IR::Opcode::SLessThan64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a < b; }); + return; + case IR::Opcode::ULessThan32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); return; + case IR::Opcode::ULessThan64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; }); + return; case IR::Opcode::SLessThanEqual: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); return; diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index 6a43ad6be..805914924 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -348,13 +348,15 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { case IR::Opcode::GetThreadBitScalarReg: case IR::Opcode::GetScalarRegister: { const IR::ScalarReg reg{inst.Arg(0).ScalarReg()}; - inst.ReplaceUsesWith( - pass.ReadVariable(reg, block, opcode == IR::Opcode::GetThreadBitScalarReg)); + const bool thread_bit = opcode == IR::Opcode::GetThreadBitScalarReg; + const IR::Value value = pass.ReadVariable(reg, block, thread_bit); + inst.ReplaceUsesWith(value); break; } case IR::Opcode::GetVectorRegister: { const IR::VectorReg reg{inst.Arg(0).VectorReg()}; - inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); + const IR::Value value = pass.ReadVariable(reg, block); + inst.ReplaceUsesWith(value); break; } case IR::Opcode::GetGotoVariable: diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index a43c17f5b..db939eaa5 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -220,6 +220,7 @@ using F16 = TypedValue; using F32 = TypedValue; using F64 = TypedValue; using U32F32 = TypedValue; +using U64F64 = TypedValue; using U32U64 = TypedValue; using U16U32U64 = TypedValue; using F32F64 = TypedValue;