From 2380f2f9c9340fcf3b35cf9ff9ff24e3726e4d89 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Thu, 5 Dec 2024 13:00:17 -0300 Subject: [PATCH 1/8] Virtual device abstraction (#1577) * IOFile: removes seek limit checks when file is writable * add virtual devices scaffold * add stdin/out/err as virtual devices * fixed some merging issues * clang-fix --------- Co-authored-by: georgemoralis --- CMakeLists.txt | 7 + src/common/io_file.cpp | 22 +-- src/common/io_file.h | 2 + src/common/logging/filter.cpp | 1 + src/common/logging/types.h | 1 + src/common/va_ctx.h | 111 +++++++++++++ src/core/devices/base_device.cpp | 12 ++ src/core/devices/base_device.h | 72 +++++++++ src/core/devices/ioccom.h | 67 ++++++++ src/core/devices/logger.cpp | 64 ++++++++ src/core/devices/logger.h | 37 +++++ src/core/devices/nop_device.h | 55 +++++++ src/core/file_sys/fs.cpp | 31 +++- src/core/file_sys/fs.h | 12 +- src/core/libraries/kernel/file_system.cpp | 186 ++++++++++++++++++---- src/core/libraries/kernel/kernel.cpp | 79 +++++---- src/core/libraries/kernel/process.cpp | 2 +- src/emulator.cpp | 3 + 18 files changed, 687 insertions(+), 77 deletions(-) create mode 100644 src/common/va_ctx.h create mode 100644 src/core/devices/base_device.cpp create mode 100644 src/core/devices/base_device.h create mode 100644 src/core/devices/ioccom.h create mode 100644 src/core/devices/logger.cpp create mode 100644 src/core/devices/logger.h create mode 100644 src/core/devices/nop_device.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f503832c..56760be37 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -502,6 +502,7 @@ set(COMMON src/common/logging/backend.cpp src/common/types.h src/common/uint128.h src/common/unique_function.h + src/common/va_ctx.h src/common/version.h src/common/ntapi.h src/common/ntapi.cpp @@ -526,6 +527,12 @@ set(CORE src/core/aerolib/stubs.cpp src/core/crypto/crypto.cpp src/core/crypto/crypto.h src/core/crypto/keys.h + src/core/devices/base_device.cpp + src/core/devices/base_device.h + src/core/devices/ioccom.h + src/core/devices/logger.cpp + src/core/devices/logger.h + src/core/devices/nop_device.h src/core/file_format/pfs.h src/core/file_format/pkg.cpp src/core/file_format/pkg.h diff --git a/src/common/io_file.cpp b/src/common/io_file.cpp index dd3a40cae..067010a26 100644 --- a/src/common/io_file.cpp +++ b/src/common/io_file.cpp @@ -377,16 +377,18 @@ bool IOFile::Seek(s64 offset, SeekOrigin origin) const { return false; } - u64 size = GetSize(); - if (origin == SeekOrigin::CurrentPosition && Tell() + offset > size) { - LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); - return false; - } else if (origin == SeekOrigin::SetOrigin && (u64)offset > size) { - LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); - return false; - } else if (origin == SeekOrigin::End && offset > 0) { - LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); - return false; + if (False(file_access_mode & (FileAccessMode::Write | FileAccessMode::Append))) { + u64 size = GetSize(); + if (origin == SeekOrigin::CurrentPosition && Tell() + offset > size) { + LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); + return false; + } else if (origin == SeekOrigin::SetOrigin && (u64)offset > size) { + LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); + return false; + } else if (origin == SeekOrigin::End && offset > 0) { + LOG_ERROR(Common_Filesystem, "Seeking past the end of the file"); + return false; + } } errno = 0; diff --git a/src/common/io_file.h b/src/common/io_file.h index 8fed4981f..feb2110ac 100644 --- a/src/common/io_file.h +++ b/src/common/io_file.h @@ -10,6 +10,7 @@ #include "common/concepts.h" #include "common/types.h" +#include "enum.h" namespace Common::FS { @@ -42,6 +43,7 @@ enum class FileAccessMode { */ ReadAppend = Read | Append, }; +DECLARE_ENUM_FLAG_OPERATORS(FileAccessMode); enum class FileType { BinaryFile, diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 632b2b329..75c61a188 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -69,6 +69,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) { SUB(Common, Memory) \ CLS(Core) \ SUB(Core, Linker) \ + SUB(Core, Devices) \ CLS(Config) \ CLS(Debug) \ CLS(Kernel) \ diff --git a/src/common/logging/types.h b/src/common/logging/types.h index e7e91882a..a0e7d021f 100644 --- a/src/common/logging/types.h +++ b/src/common/logging/types.h @@ -35,6 +35,7 @@ enum class Class : u8 { Common_Memory, ///< Memory mapping and management functions Core, ///< LLE emulation core Core_Linker, ///< The module linker + Core_Devices, ///< Devices emulation Config, ///< Emulator configuration (including commandline) Debug, ///< Debugging tools Kernel, ///< The HLE implementation of the PS4 kernel. diff --git a/src/common/va_ctx.h b/src/common/va_ctx.h new file mode 100644 index 000000000..e0b8c0bab --- /dev/null +++ b/src/common/va_ctx.h @@ -0,0 +1,111 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later +#pragma once + +#include +#include "common/types.h" + +#define VA_ARGS \ + uint64_t rdi, uint64_t rsi, uint64_t rdx, uint64_t rcx, uint64_t r8, uint64_t r9, \ + uint64_t overflow_arg_area, __m128 xmm0, __m128 xmm1, __m128 xmm2, __m128 xmm3, \ + __m128 xmm4, __m128 xmm5, __m128 xmm6, __m128 xmm7, ... + +#define VA_CTX(ctx) \ + alignas(16)::Common::VaCtx ctx{}; \ + (ctx).reg_save_area.gp[0] = rdi; \ + (ctx).reg_save_area.gp[1] = rsi; \ + (ctx).reg_save_area.gp[2] = rdx; \ + (ctx).reg_save_area.gp[3] = rcx; \ + (ctx).reg_save_area.gp[4] = r8; \ + (ctx).reg_save_area.gp[5] = r9; \ + (ctx).reg_save_area.fp[0] = xmm0; \ + (ctx).reg_save_area.fp[1] = xmm1; \ + (ctx).reg_save_area.fp[2] = xmm2; \ + (ctx).reg_save_area.fp[3] = xmm3; \ + (ctx).reg_save_area.fp[4] = xmm4; \ + (ctx).reg_save_area.fp[5] = xmm5; \ + (ctx).reg_save_area.fp[6] = xmm6; \ + (ctx).reg_save_area.fp[7] = xmm7; \ + (ctx).va_list.reg_save_area = &(ctx).reg_save_area; \ + (ctx).va_list.gp_offset = offsetof(::Common::VaRegSave, gp); \ + (ctx).va_list.fp_offset = offsetof(::Common::VaRegSave, fp); \ + (ctx).va_list.overflow_arg_area = &overflow_arg_area; + +namespace Common { + +// https://stackoverflow.com/questions/4958384/what-is-the-format-of-the-x86-64-va-list-structure + +struct VaList { + u32 gp_offset; + u32 fp_offset; + void* overflow_arg_area; + void* reg_save_area; +}; + +struct VaRegSave { + u64 gp[6]; + __m128 fp[8]; +}; + +struct VaCtx { + VaRegSave reg_save_area; + VaList va_list; +}; + +template +T vaArgRegSaveAreaGp(VaList* l) { + auto* addr = reinterpret_cast(static_cast(l->reg_save_area) + l->gp_offset); + l->gp_offset += Size; + return *addr; +} +template +T vaArgOverflowArgArea(VaList* l) { + auto ptr = ((reinterpret_cast(l->overflow_arg_area) + (Align - 1)) & ~(Align - 1)); + auto* addr = reinterpret_cast(ptr); + l->overflow_arg_area = reinterpret_cast(ptr + Size); + return *addr; +} + +template +T vaArgRegSaveAreaFp(VaList* l) { + auto* addr = reinterpret_cast(static_cast(l->reg_save_area) + l->fp_offset); + l->fp_offset += Size; + return *addr; +} + +inline int vaArgInteger(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} + +inline long long vaArgLongLong(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} +inline long vaArgLong(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} + +inline double vaArgDouble(VaList* l) { + if (l->fp_offset <= 160) { + return vaArgRegSaveAreaFp(l); + } + return vaArgOverflowArgArea(l); +} + +template +T* vaArgPtr(VaList* l) { + if (l->gp_offset <= 40) { + return vaArgRegSaveAreaGp(l); + } + return vaArgOverflowArgArea(l); +} + +} // namespace Common diff --git a/src/core/devices/base_device.cpp b/src/core/devices/base_device.cpp new file mode 100644 index 000000000..4f91c81c7 --- /dev/null +++ b/src/core/devices/base_device.cpp @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "base_device.h" + +namespace Core::Devices { + +BaseDevice::BaseDevice() = default; + +BaseDevice::~BaseDevice() = default; + +} // namespace Core::Devices \ No newline at end of file diff --git a/src/core/devices/base_device.h b/src/core/devices/base_device.h new file mode 100644 index 000000000..351af82b4 --- /dev/null +++ b/src/core/devices/base_device.h @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/types.h" +#include "common/va_ctx.h" + +namespace Libraries::Kernel { +struct OrbisKernelStat; +struct SceKernelIovec; +} // namespace Libraries::Kernel + +namespace Core::Devices { + +class BaseDevice { +public: + explicit BaseDevice(); + + virtual ~BaseDevice() = 0; + + virtual int ioctl(u64 cmd, Common::VaCtx* args) { + return ORBIS_KERNEL_ERROR_ENOTTY; + } + + virtual s64 write(const void* buf, size_t nbytes) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual size_t readv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual size_t writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 preadv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt, u64 offset) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 lseek(s64 offset, int whence) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 read(void* buf, size_t nbytes) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual int fstat(Libraries::Kernel::OrbisKernelStat* sb) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s32 fsync() { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual int ftruncate(s64 length) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual int getdents(void* buf, u32 nbytes, s64* basep) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + virtual s64 pwrite(const void* buf, size_t nbytes, u64 offset) { + return ORBIS_KERNEL_ERROR_EBADF; + } +}; + +} // namespace Core::Devices diff --git a/src/core/devices/ioccom.h b/src/core/devices/ioccom.h new file mode 100644 index 000000000..671ee33d4 --- /dev/null +++ b/src/core/devices/ioccom.h @@ -0,0 +1,67 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +/*- + * Copyright (c) 1982, 1986, 1990, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ioccom.h 8.2 (Berkeley) 3/28/94 + * $FreeBSD$ + */ + +#define IOCPARM_SHIFT 13 /* number of bits for ioctl size */ +#define IOCPARM_MASK ((1 << IOCPARM_SHIFT) - 1) /* parameter length mask */ +#define IOCPARM_LEN(x) (((x) >> 16) & IOCPARM_MASK) +#define IOCBASECMD(x) ((x) & ~(IOCPARM_MASK << 16)) +#define IOCGROUP(x) (((x) >> 8) & 0xff) + +#define IOCPARM_MAX (1 << IOCPARM_SHIFT) /* max size of ioctl */ +#define IOC_VOID 0x20000000 /* no parameters */ +#define IOC_OUT 0x40000000 /* copy out parameters */ +#define IOC_IN 0x80000000 /* copy in parameters */ +#define IOC_INOUT (IOC_IN | IOC_OUT) +#define IOC_DIRMASK (IOC_VOID | IOC_OUT | IOC_IN) + +#define _IOC(inout, group, num, len) \ + ((unsigned long)((inout) | (((len) & IOCPARM_MASK) << 16) | ((group) << 8) | (num))) +#define _IO(g, n) _IOC(IOC_VOID, (g), (n), 0) +#define _IOWINT(g, n) _IOC(IOC_VOID, (g), (n), sizeof(int)) +#define _IOR(g, n, t) _IOC(IOC_OUT, (g), (n), sizeof(t)) +#define _IOW(g, n, t) _IOC(IOC_IN, (g), (n), sizeof(t)) +/* this should be _IORW, but stdio got there first */ +#define _IOWR(g, n, t) _IOC(IOC_INOUT, (g), (n), sizeof(t)) + +/* +# Simple parse of ioctl cmd +def parse(v): + print('inout', (v >> 24 & 0xFF)) + print('len', hex(v >> 16 & 0xFF)) + print('group', chr(v >> 8 & 0xFF)) + print('num', hex(v & 0xFF)) +*/ diff --git a/src/core/devices/logger.cpp b/src/core/devices/logger.cpp new file mode 100644 index 000000000..bf5a28382 --- /dev/null +++ b/src/core/devices/logger.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/logging/log.h" +#include "core/libraries/kernel/file_system.h" +#include "logger.h" + +namespace Core::Devices { + +Logger::Logger(std::string prefix, bool is_err) : prefix(std::move(prefix)), is_err(is_err) {} + +Logger::~Logger() = default; + +s64 Logger::write(const void* buf, size_t nbytes) { + log(static_cast(buf), nbytes); + return nbytes; +} +size_t Logger::writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { + for (int i = 0; i < iovcnt; i++) { + log(static_cast(iov[i].iov_base), iov[i].iov_len); + } + return iovcnt; +} + +s64 Logger::pwrite(const void* buf, size_t nbytes, u64 offset) { + log(static_cast(buf), nbytes); + return nbytes; +} + +s32 Logger::fsync() { + log_flush(); + return 0; +} + +void Logger::log(const char* buf, size_t nbytes) { + std::scoped_lock lock{mtx}; + const char* end = buf + nbytes; + for (const char* it = buf; it < end; ++it) { + char c = *it; + if (c == '\r') { + continue; + } + if (c == '\n') { + log_flush(); + continue; + } + buffer.push_back(c); + } +} + +void Logger::log_flush() { + std::scoped_lock lock{mtx}; + if (buffer.empty()) { + return; + } + if (is_err) { + LOG_ERROR(Tty, "[{}] {}", prefix, std::string_view{buffer}); + } else { + LOG_INFO(Tty, "[{}] {}", prefix, std::string_view{buffer}); + } + buffer.clear(); +} + +} // namespace Core::Devices \ No newline at end of file diff --git a/src/core/devices/logger.h b/src/core/devices/logger.h new file mode 100644 index 000000000..bfb07f337 --- /dev/null +++ b/src/core/devices/logger.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "base_device.h" + +#include +#include +#include + +namespace Core::Devices { + +class Logger final : BaseDevice { + std::string prefix; + bool is_err; + + std::recursive_mutex mtx; + std::vector buffer; + +public: + explicit Logger(std::string prefix, bool is_err); + + ~Logger() override; + + s64 write(const void* buf, size_t nbytes) override; + size_t writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) override; + s64 pwrite(const void* buf, size_t nbytes, u64 offset) override; + + s32 fsync() override; + +private: + void log(const char* buf, size_t nbytes); + void log_flush(); +}; + +} // namespace Core::Devices diff --git a/src/core/devices/nop_device.h b/src/core/devices/nop_device.h new file mode 100644 index 000000000..a75b92f1b --- /dev/null +++ b/src/core/devices/nop_device.h @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include "base_device.h" + +namespace Core::Devices { + +class NopDevice final : BaseDevice { + u32 handle; + +public: + explicit NopDevice(u32 handle) : handle(handle) {} + + ~NopDevice() override = default; + + int ioctl(u64 cmd, Common::VaCtx* args) override { + return 0; + } + s64 write(const void* buf, size_t nbytes) override { + return 0; + } + size_t readv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) override { + return 0; + } + size_t writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) override { + return 0; + } + s64 preadv(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt, u64 offset) override { + return 0; + } + s64 lseek(s64 offset, int whence) override { + return 0; + } + s64 read(void* buf, size_t nbytes) override { + return 0; + } + int fstat(Libraries::Kernel::OrbisKernelStat* sb) override { + return 0; + } + s32 fsync() override { + return 0; + } + int ftruncate(s64 length) override { + return 0; + } + int getdents(void* buf, u32 nbytes, s64* basep) override { + return 0; + } + s64 pwrite(const void* buf, size_t nbytes, u64 offset) override { + return 0; + } +}; + +} // namespace Core::Devices diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 769940cf0..0fdbb2783 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -4,12 +4,12 @@ #include #include "common/config.h" #include "common/string_util.h" +#include "core/devices/logger.h" +#include "core/devices/nop_device.h" #include "core/file_sys/fs.h" namespace Core::FileSys { -constexpr int RESERVED_HANDLES = 3; // First 3 handles are stdin,stdout,stderr - void MntPoints::Mount(const std::filesystem::path& host_folder, const std::string& guest_folder, bool read_only) { std::scoped_lock lock{m_mutex}; @@ -135,7 +135,6 @@ int HandleTable::CreateHandle() { std::scoped_lock lock{m_mutex}; auto* file = new File{}; - file->is_directory = false; file->is_opened = false; int existingFilesNum = m_files.size(); @@ -143,23 +142,23 @@ int HandleTable::CreateHandle() { for (int index = 0; index < existingFilesNum; index++) { if (m_files.at(index) == nullptr) { m_files[index] = file; - return index + RESERVED_HANDLES; + return index; } } m_files.push_back(file); - return m_files.size() + RESERVED_HANDLES - 1; + return m_files.size() - 1; } void HandleTable::DeleteHandle(int d) { std::scoped_lock lock{m_mutex}; - delete m_files.at(d - RESERVED_HANDLES); - m_files[d - RESERVED_HANDLES] = nullptr; + delete m_files.at(d); + m_files[d] = nullptr; } File* HandleTable::GetFile(int d) { std::scoped_lock lock{m_mutex}; - return m_files.at(d - RESERVED_HANDLES); + return m_files.at(d); } File* HandleTable::GetFile(const std::filesystem::path& host_name) { @@ -171,4 +170,20 @@ File* HandleTable::GetFile(const std::filesystem::path& host_name) { return nullptr; } +void HandleTable::CreateStdHandles() { + auto setup = [this](const char* path, auto* device) { + int fd = CreateHandle(); + auto* file = GetFile(fd); + file->is_opened = true; + file->type = FileType::Device; + file->m_guest_name = path; + file->device = + std::shared_ptr{reinterpret_cast(device)}; + }; + // order matters + setup("/dev/stdin", new Devices::NopDevice(0)); // stdin + setup("/dev/stdout", new Devices::Logger("stdout", false)); // stdout + setup("/dev/stderr", new Devices::Logger("stderr", true)); // stderr +} + } // namespace Core::FileSys diff --git a/src/core/file_sys/fs.h b/src/core/file_sys/fs.h index eeaeaf781..b0153c162 100644 --- a/src/core/file_sys/fs.h +++ b/src/core/file_sys/fs.h @@ -9,6 +9,7 @@ #include #include #include "common/io_file.h" +#include "core/devices/base_device.h" namespace Core::FileSys { @@ -55,15 +56,22 @@ struct DirEntry { bool isFile; }; +enum class FileType { + Regular, // standard file + Directory, + Device, +}; + struct File { std::atomic_bool is_opened{}; - std::atomic_bool is_directory{}; + std::atomic type{FileType::Regular}; std::filesystem::path m_host_name; std::string m_guest_name; Common::FS::IOFile f; std::vector dirents; u32 dirents_index; std::mutex m_mutex; + std::shared_ptr device; // only valid for type == Device }; class HandleTable { @@ -76,6 +84,8 @@ public: File* GetFile(int d); File* GetFile(const std::filesystem::path& host_name); + void CreateStdHandles(); + private: std::vector m_files; std::mutex m_mutex; diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 1b95e5270..447467cb7 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -11,6 +11,39 @@ #include "core/libraries/libs.h" #include "kernel.h" +#include +#include + +#include "core/devices/logger.h" +#include "core/devices/nop_device.h" + +namespace D = Core::Devices; +using FactoryDevice = std::function(u32, const char*, int, u16)>; + +#define GET_DEVICE_FD(fd) \ + [](u32, const char*, int, u16) { \ + return Common::Singleton::Instance()->GetFile(fd)->device; \ + } + +// prefix path, only dev devices +static std::map available_device = { + // clang-format off + {"/dev/stdin", GET_DEVICE_FD(0)}, + {"/dev/stdout", GET_DEVICE_FD(1)}, + {"/dev/stderr", GET_DEVICE_FD(2)}, + + {"/dev/fd/0", GET_DEVICE_FD(0)}, + {"/dev/fd/1", GET_DEVICE_FD(1)}, + {"/dev/fd/2", GET_DEVICE_FD(2)}, + + {"/dev/deci_stdin", GET_DEVICE_FD(0)}, + {"/dev/deci_stdout", GET_DEVICE_FD(1)}, + {"/dev/deci_stderr", GET_DEVICE_FD(2)}, + + {"/dev/null", GET_DEVICE_FD(0)}, // fd0 (stdin) is a nop device + // clang-format on +}; + namespace Libraries::Kernel { auto GetDirectoryEntries(const std::filesystem::path& path) { @@ -24,8 +57,8 @@ auto GetDirectoryEntries(const std::filesystem::path& path) { return files; } -int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { - LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", path, flags, mode); +int PS4_SYSV_ABI sceKernelOpen(const char* raw_path, int flags, u16 mode) { + LOG_INFO(Kernel_Fs, "path = {} flags = {:#x} mode = {}", raw_path, flags, mode); auto* h = Common::Singleton::Instance(); auto* mnt = Common::Singleton::Instance(); @@ -44,22 +77,35 @@ int PS4_SYSV_ABI sceKernelOpen(const char* path, int flags, u16 mode) { bool direct = (flags & ORBIS_KERNEL_O_DIRECT) != 0; bool directory = (flags & ORBIS_KERNEL_O_DIRECTORY) != 0; - if (std::string_view{path} == "/dev/console") { + std::string_view path{raw_path}; + + if (path == "/dev/console") { return 2000; } - if (std::string_view{path} == "/dev/deci_tty6") { + if (path == "/dev/deci_tty6") { return 2001; } - if (std::string_view{path} == "/dev/stdout") { - return 2002; - } - if (std::string_view{path} == "/dev/urandom") { + if (path == "/dev/urandom") { return 2003; } + u32 handle = h->CreateHandle(); auto* file = h->GetFile(handle); + + if (path.starts_with("/dev/")) { + for (const auto& [prefix, factory] : available_device) { + if (path.starts_with(prefix)) { + file->is_opened = true; + file->type = Core::FileSys::FileType::Device; + file->m_guest_name = path; + file->device = factory(handle, path.data(), flags, mode); + return handle; + } + } + } + if (directory) { - file->is_directory = true; + file->type = Core::FileSys::FileType::Directory; file->m_guest_name = path; file->m_host_name = mnt->GetHostPath(file->m_guest_name); if (!std::filesystem::is_directory(file->m_host_name)) { // directory doesn't exist @@ -135,11 +181,12 @@ int PS4_SYSV_ABI sceKernelClose(int d) { if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } - if (!file->is_directory) { + if (file->type == Core::FileSys::FileType::Regular) { file->f.Close(); } file->is_opened = false; LOG_INFO(Kernel_Fs, "Closing {}", file->m_guest_name); + // FIXME: Lock file mutex before deleting it? h->DeleteHandle(d); return ORBIS_OK; } @@ -155,14 +202,6 @@ int PS4_SYSV_ABI posix_close(int d) { } size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { - if (d <= 2) { // stdin,stdout,stderr - char* str = strdup((const char*)buf); - if (str[nbytes - 1] == '\n') - str[nbytes - 1] = 0; - LOG_INFO(Tty, "{}", str); - free(str); - return nbytes; - } auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); if (file == nullptr) { @@ -170,6 +209,9 @@ size_t PS4_SYSV_ABI sceKernelWrite(int d, const void* buf, size_t nbytes) { } std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->write(buf, nbytes); + } return file->f.WriteRaw(buf, nbytes); } @@ -207,17 +249,63 @@ int PS4_SYSV_ABI sceKernelUnlink(const char* path) { size_t PS4_SYSV_ABI _readv(int d, const SceKernelIovec* iov, int iovcnt) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); - size_t total_read = 0; + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + int r = file->device->readv(iov, iovcnt); + if (r < 0) { + ErrSceToPosix(r); + return -1; + } + return r; + } + size_t total_read = 0; for (int i = 0; i < iovcnt; i++) { total_read += file->f.ReadRaw(iov[i].iov_base, iov[i].iov_len); } return total_read; } +size_t PS4_SYSV_ABI _writev(int fd, const SceKernelIovec* iov, int iovcn) { + if (fd == 1) { + size_t total_written = 0; + for (int i = 0; i < iovcn; i++) { + total_written += ::fwrite(iov[i].iov_base, 1, iov[i].iov_len, stdout); + } + return total_written; + } + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + std::scoped_lock lk{file->m_mutex}; + + if (file->type == Core::FileSys::FileType::Device) { + return file->device->writev(iov, iovcn); + } + size_t total_written = 0; + for (int i = 0; i < iovcn; i++) { + total_written += file->f.WriteRaw(iov[i].iov_base, iov[i].iov_len); + } + return total_written; +} + s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(d); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->lseek(offset, whence); + } Common::FS::SeekOrigin origin{}; if (whence == 0) { @@ -228,7 +316,6 @@ s64 PS4_SYSV_ABI sceKernelLseek(int d, s64 offset, int whence) { origin = Common::FS::SeekOrigin::End; } - std::scoped_lock lk{file->m_mutex}; if (!file->f.Seek(offset, origin)) { LOG_CRITICAL(Kernel_Fs, "sceKernelLseek: failed to seek"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -261,6 +348,9 @@ s64 PS4_SYSV_ABI sceKernelRead(int d, void* buf, size_t nbytes) { } std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->read(buf, nbytes); + } return file->f.ReadRaw(buf, nbytes); } @@ -409,7 +499,13 @@ int PS4_SYSV_ABI posix_stat(const char* path, OrbisKernelStat* sb) { int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) { auto* mnt = Common::Singleton::Instance(); - const auto path_name = mnt->GetHostPath(path); + std::string_view guest_path{path}; + for (const auto& prefix : available_device | std::views::keys) { + if (guest_path.starts_with(prefix)) { + return ORBIS_OK; + } + } + const auto path_name = mnt->GetHostPath(guest_path); if (!std::filesystem::exists(path_name)) { return ORBIS_KERNEL_ERROR_ENOENT; } @@ -431,6 +527,10 @@ s64 PS4_SYSV_ABI sceKernelPreadv(int d, SceKernelIovec* iov, int iovcnt, s64 off } std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->preadv(iov, iovcnt, offset); + } + const s64 pos = file->f.Tell(); SCOPE_EXIT { file->f.Seek(pos); @@ -466,18 +566,25 @@ int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) { } std::memset(sb, 0, sizeof(OrbisKernelStat)); - if (file->is_directory) { - sb->st_mode = 0000777u | 0040000u; - sb->st_size = 0; - sb->st_blksize = 512; - sb->st_blocks = 0; - // TODO incomplete - } else { + switch (file->type) { + case Core::FileSys::FileType::Device: + return file->device->fstat(sb); + case Core::FileSys::FileType::Regular: sb->st_mode = 0000777u | 0100000u; sb->st_size = file->f.GetSize(); sb->st_blksize = 512; sb->st_blocks = (sb->st_size + 511) / 512; // TODO incomplete + break; + case Core::FileSys::FileType::Directory: + sb->st_mode = 0000777u | 0040000u; + sb->st_size = 0; + sb->st_blksize = 512; + sb->st_blocks = 0; + // TODO incomplete + break; + default: + UNREACHABLE(); } return ORBIS_OK; } @@ -495,6 +602,13 @@ int PS4_SYSV_ABI posix_fstat(int fd, OrbisKernelStat* sb) { s32 PS4_SYSV_ABI sceKernelFsync(int fd) { auto* h = Common::Singleton::Instance(); auto* file = h->GetFile(fd); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + + if (file->type == Core::FileSys::FileType::Device) { + return file->device->fsync(); + } file->f.Flush(); return ORBIS_OK; } @@ -517,6 +631,10 @@ int PS4_SYSV_ABI sceKernelFtruncate(int fd, s64 length) { return ORBIS_KERNEL_ERROR_EBADF; } + if (file->type == Core::FileSys::FileType::Device) { + return file->device->ftruncate(length); + } + if (file->m_host_name.empty()) { return ORBIS_KERNEL_ERROR_EACCES; } @@ -538,10 +656,15 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } + if (file->type != Core::FileSys::FileType::Device) { + return file->device->getdents(buf, nbytes, basep); + } + if (file->dirents_index == file->dirents.size()) { return ORBIS_OK; } - if (!file->is_directory || nbytes < 512 || file->dirents_index > file->dirents.size()) { + if (file->type != Core::FileSys::FileType::Directory || nbytes < 512 || + file->dirents_index > file->dirents.size()) { return ORBIS_KERNEL_ERROR_EINVAL; } const auto& entry = file->dirents.at(file->dirents_index++); @@ -586,6 +709,10 @@ s64 PS4_SYSV_ABI sceKernelPwrite(int d, void* buf, size_t nbytes, s64 offset) { } std::scoped_lock lk{file->m_mutex}; + + if (file->type == Core::FileSys::FileType::Device) { + return file->device->pwrite(buf, nbytes, offset); + } const s64 pos = file->f.Tell(); SCOPE_EXIT { file->f.Seek(pos); @@ -637,6 +764,7 @@ void RegisterFileSystem(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("4wSze92BhLI", "libkernel", 1, "libkernel", 1, 1, sceKernelWrite); LIB_FUNCTION("+WRlkKjZvag", "libkernel", 1, "libkernel", 1, 1, _readv); + LIB_FUNCTION("YSHRBRLn2pI", "libkernel", 1, "libkernel", 1, 1, _writev); LIB_FUNCTION("Oy6IpwgtYOk", "libkernel", 1, "libkernel", 1, 1, posix_lseek); LIB_FUNCTION("Oy6IpwgtYOk", "libScePosix", 1, "libkernel", 1, 1, posix_lseek); LIB_FUNCTION("oib76F-12fk", "libkernel", 1, "libkernel", 1, 1, sceKernelLseek); diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index 4028116ef..0b4e89fc7 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -9,6 +9,9 @@ #include "common/logging/log.h" #include "common/polyfill_thread.h" #include "common/thread.h" +#include "common/va_ctx.h" +#include "core/file_sys/fs.h" +#include "core/libraries/error_codes.h" #include "core/libraries/kernel/equeue.h" #include "core/libraries/kernel/file_system.h" #include "core/libraries/kernel/kernel.h" @@ -24,6 +27,7 @@ #ifdef _WIN64 #include #endif +#include namespace Libraries::Kernel { @@ -65,19 +69,6 @@ static PS4_SYSV_ABI void stack_chk_fail() { UNREACHABLE(); } -struct iovec { - void* iov_base; /* Base address. */ - size_t iov_len; /* Length. */ -}; - -size_t PS4_SYSV_ABI _writev(int fd, const struct iovec* iov, int iovcn) { - size_t total_written = 0; - for (int i = 0; i < iovcn; i++) { - total_written += ::fwrite(iov[i].iov_base, 1, iov[i].iov_len, stdout); - } - return total_written; -} - static thread_local int g_posix_errno = 0; int* PS4_SYSV_ABI __Error() { @@ -142,24 +133,33 @@ void PS4_SYSV_ABI sceLibcHeapGetTraceInfo(HeapInfoInfo* info) { } s64 PS4_SYSV_ABI ps4__write(int d, const char* buf, std::size_t nbytes) { - if (d <= 2) { // stdin,stdout,stderr - std::string_view str{buf}; - if (str[nbytes - 1] == '\n') { - str = str.substr(0, nbytes - 1); - } - LOG_INFO(Tty, "{}", str); - return nbytes; + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(d); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; } - LOG_ERROR(Kernel, "(STUBBED) called d = {} nbytes = {} ", d, nbytes); - UNREACHABLE(); - return ORBIS_OK; + std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->write(buf, nbytes); + } + return file->f.WriteRaw(buf, nbytes); } s64 PS4_SYSV_ABI ps4__read(int d, void* buf, u64 nbytes) { - ASSERT_MSG(d == 0, "d is not 0!"); - - return static_cast( - strlen(std::fgets(static_cast(buf), static_cast(nbytes), stdin))); + if (d == 0) { + return static_cast( + strlen(std::fgets(static_cast(buf), static_cast(nbytes), stdin))); + } + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(d); + if (file == nullptr) { + return ORBIS_KERNEL_ERROR_EBADF; + } + std::scoped_lock lk{file->m_mutex}; + if (file->type == Core::FileSys::FileType::Device) { + return file->device->read(buf, nbytes); + } + return file->f.ReadRaw(buf, nbytes); } struct OrbisKernelUuid { @@ -189,6 +189,29 @@ int PS4_SYSV_ABI sceKernelUuidCreate(OrbisKernelUuid* orbisUuid) { return 0; } +int PS4_SYSV_ABI kernel_ioctl(int fd, u64 cmd, VA_ARGS) { + auto* h = Common::Singleton::Instance(); + auto* file = h->GetFile(fd); + if (file == nullptr) { + LOG_INFO(Lib_Kernel, "ioctl: fd = {:X} cmd = {:X} file == nullptr", fd, cmd); + g_posix_errno = POSIX_EBADF; + return -1; + } + if (file->type != Core::FileSys::FileType::Device) { + LOG_WARNING(Lib_Kernel, "ioctl: fd = {:X} cmd = {:X} file->type != Device", fd, cmd); + g_posix_errno = ENOTTY; + return -1; + } + VA_CTX(ctx); + int result = file->device->ioctl(cmd, &ctx); + LOG_TRACE(Lib_Kernel, "ioctl: fd = {:X} cmd = {:X} result = {}", fd, cmd, result); + if (result < 0) { + ErrSceToPosix(result); + return -1; + } + return result; +} + const char* PS4_SYSV_ABI sceKernelGetFsSandboxRandomWord() { const char* path = "sys"; return path; @@ -219,13 +242,13 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) { Libraries::Kernel::RegisterException(sym); LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard); + LIB_FUNCTION("PfccT7qURYE", "libkernel", 1, "libkernel", 1, 1, kernel_ioctl); LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetFsSandboxRandomWord); LIB_FUNCTION("XVL8So3QJUk", "libkernel", 1, "libkernel", 1, 1, posix_connect); LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", 1, 1, _sigprocmask); LIB_FUNCTION("Xjoosiw+XPI", "libkernel", 1, "libkernel", 1, 1, sceKernelUuidCreate); LIB_FUNCTION("Ou3iL1abvng", "libkernel", 1, "libkernel", 1, 1, stack_chk_fail); LIB_FUNCTION("9BcDykPmo1I", "libkernel", 1, "libkernel", 1, 1, __Error); - LIB_FUNCTION("YSHRBRLn2pI", "libkernel", 1, "libkernel", 1, 1, _writev); LIB_FUNCTION("DRuBt2pvICk", "libkernel", 1, "libkernel", 1, 1, ps4__read); LIB_FUNCTION("k+AXqu2-eBc", "libkernel", 1, "libkernel", 1, 1, posix_getpagesize); LIB_FUNCTION("k+AXqu2-eBc", "libScePosix", 1, "libkernel", 1, 1, posix_getpagesize); diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 15e4ff820..a657ddf98 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -20,7 +20,7 @@ int PS4_SYSV_ABI sceKernelIsNeoMode() { int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) { int version = Common::ElfInfo::Instance().RawFirmwareVer(); *ver = version; - return (version > 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL; + return (version >= 0) ? ORBIS_OK : ORBIS_KERNEL_ERROR_EINVAL; } int PS4_SYSV_ABI sceKernelGetCpumode() { diff --git a/src/emulator.cpp b/src/emulator.cpp index 1d2542d2b..60d6e18d7 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -75,6 +75,9 @@ Emulator::Emulator() { LOG_INFO(Config, "Vulkan rdocMarkersEnable: {}", Config::vkMarkersEnabled()); LOG_INFO(Config, "Vulkan crashDiagnostics: {}", Config::vkCrashDiagnosticEnabled()); + // Create stdin/stdout/stderr + Common::Singleton::Instance()->CreateStdHandles(); + // Defer until after logging is initialized. memory = Core::Memory::Instance(); controller = Common::Singleton::Instance(); From 98f0cb65d757a6fb5877ec66573a48ed0bc3b994 Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:21:35 +0100 Subject: [PATCH 2/8] The way to Unity, pt.1 (#1659) --- src/common/ntapi.cpp | 6 + src/common/ntapi.h | 417 +++++++++++++++++- src/core/libraries/kernel/threads/pthread.cpp | 31 +- src/core/libraries/kernel/threads/pthread.h | 2 +- src/core/memory.cpp | 10 +- src/core/thread.cpp | 104 ++++- src/core/thread.h | 21 +- src/video_core/amdgpu/liverpool.cpp | 2 +- 8 files changed, 564 insertions(+), 29 deletions(-) diff --git a/src/common/ntapi.cpp b/src/common/ntapi.cpp index 0fe797e09..ffdedb17f 100644 --- a/src/common/ntapi.cpp +++ b/src/common/ntapi.cpp @@ -5,8 +5,11 @@ #include "ntapi.h" +NtClose_t NtClose = nullptr; NtDelayExecution_t NtDelayExecution = nullptr; NtSetInformationFile_t NtSetInformationFile = nullptr; +NtCreateThread_t NtCreateThread = nullptr; +NtTerminateThread_t NtTerminateThread = nullptr; namespace Common::NtApi { @@ -14,9 +17,12 @@ void Initialize() { HMODULE nt_handle = GetModuleHandleA("ntdll.dll"); // http://stackoverflow.com/a/31411628/4725495 + NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose"); NtDelayExecution = (NtDelayExecution_t)GetProcAddress(nt_handle, "NtDelayExecution"); NtSetInformationFile = (NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile"); + NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread"); + NtTerminateThread = (NtTerminateThread_t)GetProcAddress(nt_handle, "NtTerminateThread"); } } // namespace Common::NtApi diff --git a/src/common/ntapi.h b/src/common/ntapi.h index 17d353403..743174061 100644 --- a/src/common/ntapi.h +++ b/src/common/ntapi.h @@ -108,14 +108,427 @@ typedef struct _FILE_DISPOSITION_INFORMATION { BOOLEAN DeleteFile; } FILE_DISPOSITION_INFORMATION, *PFILE_DISPOSITION_INFORMATION; -typedef u32(__stdcall* NtDelayExecution_t)(BOOL Alertable, PLARGE_INTEGER DelayInterval); +typedef struct _UNICODE_STRING { + USHORT Length; + USHORT MaximumLength; + PWCH Buffer; +} UNICODE_STRING, *PUNICODE_STRING; -typedef u32(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, +typedef const UNICODE_STRING* PCUNICODE_STRING; + +typedef struct _OBJECT_ATTRIBUTES { + ULONG Length; + HANDLE RootDirectory; + PCUNICODE_STRING ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; // PSECURITY_DESCRIPTOR; + PVOID SecurityQualityOfService; // PSECURITY_QUALITY_OF_SERVICE +} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; + +typedef const OBJECT_ATTRIBUTES* PCOBJECT_ATTRIBUTES; + +typedef struct _CLIENT_ID { + HANDLE UniqueProcess; + HANDLE UniqueThread; +} CLIENT_ID, *PCLIENT_ID; + +typedef struct _INITIAL_TEB { + struct { + PVOID OldStackBase; + PVOID OldStackLimit; + } OldInitialTeb; + PVOID StackBase; + PVOID StackLimit; + PVOID StackAllocationBase; +} INITIAL_TEB, *PINITIAL_TEB; + +typedef struct _PEB_LDR_DATA { + ULONG Length; + BOOLEAN Initialized; + PVOID SsHandle; + LIST_ENTRY InLoadOrderModuleList; + LIST_ENTRY InMemoryOrderModuleList; + LIST_ENTRY InInitializationOrderModuleList; + PVOID EntryInProgress; + BOOLEAN ShutdownInProgress; + HANDLE ShutdownThreadId; +} PEB_LDR_DATA, *PPEB_LDR_DATA; + +typedef struct _CURDIR { + UNICODE_STRING DosPath; + PVOID Handle; +} CURDIR, *PCURDIR; + +typedef struct RTL_DRIVE_LETTER_CURDIR { + USHORT Flags; + USHORT Length; + ULONG TimeStamp; + UNICODE_STRING DosPath; +} RTL_DRIVE_LETTER_CURDIR, *PRTL_DRIVE_LETTER_CURDIR; + +typedef struct _RTL_USER_PROCESS_PARAMETERS { + ULONG AllocationSize; + ULONG Size; + ULONG Flags; + ULONG DebugFlags; + HANDLE ConsoleHandle; + ULONG ConsoleFlags; + HANDLE hStdInput; + HANDLE hStdOutput; + HANDLE hStdError; + CURDIR CurrentDirectory; + UNICODE_STRING DllPath; + UNICODE_STRING ImagePathName; + UNICODE_STRING CommandLine; + PWSTR Environment; + ULONG dwX; + ULONG dwY; + ULONG dwXSize; + ULONG dwYSize; + ULONG dwXCountChars; + ULONG dwYCountChars; + ULONG dwFillAttribute; + ULONG dwFlags; + ULONG wShowWindow; + UNICODE_STRING WindowTitle; + UNICODE_STRING Desktop; + UNICODE_STRING ShellInfo; + UNICODE_STRING RuntimeInfo; + RTL_DRIVE_LETTER_CURDIR DLCurrentDirectory[0x20]; + ULONG_PTR EnvironmentSize; + ULONG_PTR EnvironmentVersion; + PVOID PackageDependencyData; + ULONG ProcessGroupId; + ULONG LoaderThreads; +} RTL_USER_PROCESS_PARAMETERS, *PRTL_USER_PROCESS_PARAMETERS; + +typedef struct tagRTL_BITMAP { + ULONG SizeOfBitMap; + PULONG Buffer; +} RTL_BITMAP, *PRTL_BITMAP; + +typedef struct { + UINT next; + UINT id; + ULONGLONG addr; + ULONGLONG size; + UINT args[4]; +} CROSS_PROCESS_WORK_ENTRY; + +typedef union { + struct { + UINT first; + UINT counter; + }; + volatile LONGLONG hdr; +} CROSS_PROCESS_WORK_HDR; + +typedef struct { + CROSS_PROCESS_WORK_HDR free_list; + CROSS_PROCESS_WORK_HDR work_list; + ULONGLONG unknown[4]; + CROSS_PROCESS_WORK_ENTRY entries[1]; +} CROSS_PROCESS_WORK_LIST; + +typedef struct _CHPEV2_PROCESS_INFO { + ULONG Wow64ExecuteFlags; /* 000 */ + USHORT NativeMachineType; /* 004 */ + USHORT EmulatedMachineType; /* 006 */ + HANDLE SectionHandle; /* 008 */ + CROSS_PROCESS_WORK_LIST* CrossProcessWorkList; /* 010 */ + void* unknown; /* 018 */ +} CHPEV2_PROCESS_INFO, *PCHPEV2_PROCESS_INFO; + +typedef u64(__stdcall* KERNEL_CALLBACK_PROC)(void*, ULONG); + +typedef struct _PEB { /* win32/win64 */ + BOOLEAN InheritedAddressSpace; /* 000/000 */ + BOOLEAN ReadImageFileExecOptions; /* 001/001 */ + BOOLEAN BeingDebugged; /* 002/002 */ + UCHAR ImageUsedLargePages : 1; /* 003/003 */ + UCHAR IsProtectedProcess : 1; + UCHAR IsImageDynamicallyRelocated : 1; + UCHAR SkipPatchingUser32Forwarders : 1; + UCHAR IsPackagedProcess : 1; + UCHAR IsAppContainer : 1; + UCHAR IsProtectedProcessLight : 1; + UCHAR IsLongPathAwareProcess : 1; + HANDLE Mutant; /* 004/008 */ + HMODULE ImageBaseAddress; /* 008/010 */ + PPEB_LDR_DATA LdrData; /* 00c/018 */ + RTL_USER_PROCESS_PARAMETERS* ProcessParameters; /* 010/020 */ + PVOID SubSystemData; /* 014/028 */ + HANDLE ProcessHeap; /* 018/030 */ + PRTL_CRITICAL_SECTION FastPebLock; /* 01c/038 */ + PVOID AtlThunkSListPtr; /* 020/040 */ + PVOID IFEOKey; /* 024/048 */ + ULONG ProcessInJob : 1; /* 028/050 */ + ULONG ProcessInitializing : 1; + ULONG ProcessUsingVEH : 1; + ULONG ProcessUsingVCH : 1; + ULONG ProcessUsingFTH : 1; + ULONG ProcessPreviouslyThrottled : 1; + ULONG ProcessCurrentlyThrottled : 1; + ULONG ProcessImagesHotPatched : 1; + ULONG ReservedBits0 : 24; + KERNEL_CALLBACK_PROC* KernelCallbackTable; /* 02c/058 */ + ULONG Reserved; /* 030/060 */ + ULONG AtlThunkSListPtr32; /* 034/064 */ + PVOID ApiSetMap; /* 038/068 */ + ULONG TlsExpansionCounter; /* 03c/070 */ + PRTL_BITMAP TlsBitmap; /* 040/078 */ + ULONG TlsBitmapBits[2]; /* 044/080 */ + PVOID ReadOnlySharedMemoryBase; /* 04c/088 */ + PVOID SharedData; /* 050/090 */ + PVOID* ReadOnlyStaticServerData; /* 054/098 */ + PVOID AnsiCodePageData; /* 058/0a0 */ + PVOID OemCodePageData; /* 05c/0a8 */ + PVOID UnicodeCaseTableData; /* 060/0b0 */ + ULONG NumberOfProcessors; /* 064/0b8 */ + ULONG NtGlobalFlag; /* 068/0bc */ + LARGE_INTEGER CriticalSectionTimeout; /* 070/0c0 */ + SIZE_T HeapSegmentReserve; /* 078/0c8 */ + SIZE_T HeapSegmentCommit; /* 07c/0d0 */ + SIZE_T HeapDeCommitTotalFreeThreshold; /* 080/0d8 */ + SIZE_T HeapDeCommitFreeBlockThreshold; /* 084/0e0 */ + ULONG NumberOfHeaps; /* 088/0e8 */ + ULONG MaximumNumberOfHeaps; /* 08c/0ec */ + PVOID* ProcessHeaps; /* 090/0f0 */ + PVOID GdiSharedHandleTable; /* 094/0f8 */ + PVOID ProcessStarterHelper; /* 098/100 */ + PVOID GdiDCAttributeList; /* 09c/108 */ + PVOID LoaderLock; /* 0a0/110 */ + ULONG OSMajorVersion; /* 0a4/118 */ + ULONG OSMinorVersion; /* 0a8/11c */ + ULONG OSBuildNumber; /* 0ac/120 */ + ULONG OSPlatformId; /* 0b0/124 */ + ULONG ImageSubSystem; /* 0b4/128 */ + ULONG ImageSubSystemMajorVersion; /* 0b8/12c */ + ULONG ImageSubSystemMinorVersion; /* 0bc/130 */ + KAFFINITY ActiveProcessAffinityMask; /* 0c0/138 */ +#ifdef _WIN64 + ULONG GdiHandleBuffer[60]; /* /140 */ +#else + ULONG GdiHandleBuffer[34]; /* 0c4/ */ +#endif + PVOID PostProcessInitRoutine; /* 14c/230 */ + PRTL_BITMAP TlsExpansionBitmap; /* 150/238 */ + ULONG TlsExpansionBitmapBits[32]; /* 154/240 */ + ULONG SessionId; /* 1d4/2c0 */ + ULARGE_INTEGER AppCompatFlags; /* 1d8/2c8 */ + ULARGE_INTEGER AppCompatFlagsUser; /* 1e0/2d0 */ + PVOID ShimData; /* 1e8/2d8 */ + PVOID AppCompatInfo; /* 1ec/2e0 */ + UNICODE_STRING CSDVersion; /* 1f0/2e8 */ + PVOID ActivationContextData; /* 1f8/2f8 */ + PVOID ProcessAssemblyStorageMap; /* 1fc/300 */ + PVOID SystemDefaultActivationData; /* 200/308 */ + PVOID SystemAssemblyStorageMap; /* 204/310 */ + SIZE_T MinimumStackCommit; /* 208/318 */ + PVOID* FlsCallback; /* 20c/320 */ + LIST_ENTRY FlsListHead; /* 210/328 */ + union { + PRTL_BITMAP FlsBitmap; /* 218/338 */ +#ifdef _WIN64 + CHPEV2_PROCESS_INFO* ChpeV2ProcessInfo; /* /338 */ +#endif + }; + ULONG FlsBitmapBits[4]; /* 21c/340 */ + ULONG FlsHighIndex; /* 22c/350 */ + PVOID WerRegistrationData; /* 230/358 */ + PVOID WerShipAssertPtr; /* 234/360 */ + PVOID EcCodeBitMap; /* 238/368 */ + PVOID pImageHeaderHash; /* 23c/370 */ + ULONG HeapTracingEnabled : 1; /* 240/378 */ + ULONG CritSecTracingEnabled : 1; + ULONG LibLoaderTracingEnabled : 1; + ULONG SpareTracingBits : 29; + ULONGLONG CsrServerReadOnlySharedMemoryBase; /* 248/380 */ + ULONG TppWorkerpListLock; /* 250/388 */ + LIST_ENTRY TppWorkerpList; /* 254/390 */ + PVOID WaitOnAddressHashTable[0x80]; /* 25c/3a0 */ + PVOID TelemetryCoverageHeader; /* 45c/7a0 */ + ULONG CloudFileFlags; /* 460/7a8 */ + ULONG CloudFileDiagFlags; /* 464/7ac */ + CHAR PlaceholderCompatibilityMode; /* 468/7b0 */ + CHAR PlaceholderCompatibilityModeReserved[7]; /* 469/7b1 */ + PVOID LeapSecondData; /* 470/7b8 */ + ULONG LeapSecondFlags; /* 474/7c0 */ + ULONG NtGlobalFlag2; /* 478/7c4 */ +} PEB, *PPEB; + +typedef struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME { + struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME* Previous; + struct _ACTIVATION_CONTEXT* ActivationContext; + ULONG Flags; +} RTL_ACTIVATION_CONTEXT_STACK_FRAME, *PRTL_ACTIVATION_CONTEXT_STACK_FRAME; + +typedef struct _ACTIVATION_CONTEXT_STACK { + RTL_ACTIVATION_CONTEXT_STACK_FRAME* ActiveFrame; + LIST_ENTRY FrameListCache; + ULONG Flags; + ULONG NextCookieSequenceNumber; + ULONG_PTR StackId; +} ACTIVATION_CONTEXT_STACK, *PACTIVATION_CONTEXT_STACK; + +typedef struct _GDI_TEB_BATCH { + ULONG Offset; + HANDLE HDC; + ULONG Buffer[0x136]; +} GDI_TEB_BATCH; + +typedef struct _TEB_ACTIVE_FRAME_CONTEXT { + ULONG Flags; + const char* FrameName; +} TEB_ACTIVE_FRAME_CONTEXT, *PTEB_ACTIVE_FRAME_CONTEXT; + +typedef struct _TEB_ACTIVE_FRAME { + ULONG Flags; + struct _TEB_ACTIVE_FRAME* Previous; + TEB_ACTIVE_FRAME_CONTEXT* Context; +} TEB_ACTIVE_FRAME, *PTEB_ACTIVE_FRAME; + +typedef struct _TEB { /* win32/win64 */ + NT_TIB Tib; /* 000/0000 */ + PVOID EnvironmentPointer; /* 01c/0038 */ + CLIENT_ID ClientId; /* 020/0040 */ + PVOID ActiveRpcHandle; /* 028/0050 */ + PVOID ThreadLocalStoragePointer; /* 02c/0058 */ + PPEB Peb; /* 030/0060 */ + ULONG LastErrorValue; /* 034/0068 */ + ULONG CountOfOwnedCriticalSections; /* 038/006c */ + PVOID CsrClientThread; /* 03c/0070 */ + PVOID Win32ThreadInfo; /* 040/0078 */ + ULONG User32Reserved[26]; /* 044/0080 */ + ULONG UserReserved[5]; /* 0ac/00e8 */ + PVOID WOW32Reserved; /* 0c0/0100 */ + ULONG CurrentLocale; /* 0c4/0108 */ + ULONG FpSoftwareStatusRegister; /* 0c8/010c */ + PVOID ReservedForDebuggerInstrumentation[16]; /* 0cc/0110 */ +#ifdef _WIN64 + PVOID SystemReserved1[30]; /* /0190 */ +#else + PVOID SystemReserved1[26]; /* 10c/ used for krnl386 private data in Wine */ +#endif + char PlaceholderCompatibilityMode; /* 174/0280 */ + BOOLEAN PlaceholderHydrationAlwaysExplicit; /* 175/0281 */ + char PlaceholderReserved[10]; /* 176/0282 */ + DWORD ProxiedProcessId; /* 180/028c */ + ACTIVATION_CONTEXT_STACK ActivationContextStack; /* 184/0290 */ + UCHAR WorkingOnBehalfOfTicket[8]; /* 19c/02b8 */ + LONG ExceptionCode; /* 1a4/02c0 */ + ACTIVATION_CONTEXT_STACK* ActivationContextStackPointer; /* 1a8/02c8 */ + ULONG_PTR InstrumentationCallbackSp; /* 1ac/02d0 */ + ULONG_PTR InstrumentationCallbackPreviousPc; /* 1b0/02d8 */ + ULONG_PTR InstrumentationCallbackPreviousSp; /* 1b4/02e0 */ +#ifdef _WIN64 + ULONG TxFsContext; /* /02e8 */ + BOOLEAN InstrumentationCallbackDisabled; /* /02ec */ + BOOLEAN UnalignedLoadStoreExceptions; /* /02ed */ +#else + BOOLEAN InstrumentationCallbackDisabled; /* 1b8/ */ + BYTE SpareBytes1[23]; /* 1b9/ */ + ULONG TxFsContext; /* 1d0/ */ +#endif + GDI_TEB_BATCH GdiTebBatch; /* 1d4/02f0 used for ntdll private data in Wine */ + CLIENT_ID RealClientId; /* 6b4/07d8 */ + HANDLE GdiCachedProcessHandle; /* 6bc/07e8 */ + ULONG GdiClientPID; /* 6c0/07f0 */ + ULONG GdiClientTID; /* 6c4/07f4 */ + PVOID GdiThreadLocaleInfo; /* 6c8/07f8 */ + ULONG_PTR Win32ClientInfo[62]; /* 6cc/0800 used for user32 private data in Wine */ + PVOID glDispatchTable[233]; /* 7c4/09f0 */ + PVOID glReserved1[29]; /* b68/1138 */ + PVOID glReserved2; /* bdc/1220 */ + PVOID glSectionInfo; /* be0/1228 */ + PVOID glSection; /* be4/1230 */ + PVOID glTable; /* be8/1238 */ + PVOID glCurrentRC; /* bec/1240 */ + PVOID glContext; /* bf0/1248 */ + ULONG LastStatusValue; /* bf4/1250 */ + UNICODE_STRING StaticUnicodeString; /* bf8/1258 */ + WCHAR StaticUnicodeBuffer[261]; /* c00/1268 */ + PVOID DeallocationStack; /* e0c/1478 */ + PVOID TlsSlots[64]; /* e10/1480 */ + LIST_ENTRY TlsLinks; /* f10/1680 */ + PVOID Vdm; /* f18/1690 */ + PVOID ReservedForNtRpc; /* f1c/1698 */ + PVOID DbgSsReserved[2]; /* f20/16a0 */ + ULONG HardErrorMode; /* f28/16b0 */ +#ifdef _WIN64 + PVOID Instrumentation[11]; /* /16b8 */ +#else + PVOID Instrumentation[9]; /* f2c/ */ +#endif + GUID ActivityId; /* f50/1710 */ + PVOID SubProcessTag; /* f60/1720 */ + PVOID PerflibData; /* f64/1728 */ + PVOID EtwTraceData; /* f68/1730 */ + PVOID WinSockData; /* f6c/1738 */ + ULONG GdiBatchCount; /* f70/1740 */ + ULONG IdealProcessorValue; /* f74/1744 */ + ULONG GuaranteedStackBytes; /* f78/1748 */ + PVOID ReservedForPerf; /* f7c/1750 */ + PVOID ReservedForOle; /* f80/1758 */ + ULONG WaitingOnLoaderLock; /* f84/1760 */ + PVOID SavedPriorityState; /* f88/1768 */ + ULONG_PTR ReservedForCodeCoverage; /* f8c/1770 */ + PVOID ThreadPoolData; /* f90/1778 */ + PVOID* TlsExpansionSlots; /* f94/1780 */ +#ifdef _WIN64 + union { + PVOID DeallocationBStore; /* /1788 */ + PVOID* ChpeV2CpuAreaInfo; /* /1788 */ + } DUMMYUNIONNAME; + PVOID BStoreLimit; /* /1790 */ +#endif + ULONG MuiGeneration; /* f98/1798 */ + ULONG IsImpersonating; /* f9c/179c */ + PVOID NlsCache; /* fa0/17a0 */ + PVOID ShimData; /* fa4/17a8 */ + ULONG HeapVirtualAffinity; /* fa8/17b0 */ + PVOID CurrentTransactionHandle; /* fac/17b8 */ + TEB_ACTIVE_FRAME* ActiveFrame; /* fb0/17c0 */ + PVOID* FlsSlots; /* fb4/17c8 */ + PVOID PreferredLanguages; /* fb8/17d0 */ + PVOID UserPrefLanguages; /* fbc/17d8 */ + PVOID MergedPrefLanguages; /* fc0/17e0 */ + ULONG MuiImpersonation; /* fc4/17e8 */ + USHORT CrossTebFlags; /* fc8/17ec */ + USHORT SameTebFlags; /* fca/17ee */ + PVOID TxnScopeEnterCallback; /* fcc/17f0 */ + PVOID TxnScopeExitCallback; /* fd0/17f8 */ + PVOID TxnScopeContext; /* fd4/1800 */ + ULONG LockCount; /* fd8/1808 */ + LONG WowTebOffset; /* fdc/180c */ + PVOID ResourceRetValue; /* fe0/1810 */ + PVOID ReservedForWdf; /* fe4/1818 */ + ULONGLONG ReservedForCrt; /* fe8/1820 */ + GUID EffectiveContainerId; /* ff0/1828 */ +} TEB, *PTEB; +static_assert(offsetof(TEB, DeallocationStack) == + 0x1478); /* The only member we care about at the moment */ + +typedef u64(__stdcall* NtClose_t)(HANDLE Handle); + +typedef u64(__stdcall* NtDelayExecution_t)(BOOL Alertable, PLARGE_INTEGER DelayInterval); + +typedef u64(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, PVOID FileInformation, ULONG Length, FILE_INFORMATION_CLASS FileInformationClass); +typedef u64(__stdcall* NtCreateThread_t)(PHANDLE ThreadHandle, ACCESS_MASK DesiredAccess, + PCOBJECT_ATTRIBUTES ObjectAttributes, HANDLE ProcessHandle, + PCLIENT_ID ClientId, PCONTEXT ThreadContext, + PINITIAL_TEB InitialTeb, BOOLEAN CreateSuspended); + +typedef u64(__stdcall* NtTerminateThread_t)(HANDLE ThreadHandle, u64 ExitStatus); + +extern NtClose_t NtClose; extern NtDelayExecution_t NtDelayExecution; extern NtSetInformationFile_t NtSetInformationFile; +extern NtCreateThread_t NtCreateThread; +extern NtTerminateThread_t NtTerminateThread; namespace Common::NtApi { void Initialize(); diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 793ddd1fe..a562c51b2 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -206,6 +206,7 @@ static void RunThread(void* arg) { DebugState.AddCurrentThreadToGuestList(); /* Run the current thread's start routine with argument: */ + curthread->native_thr.Initialize(); void* ret = Core::ExecuteGuest(curthread->start_routine, curthread->arg); /* Remove thread from tracking */ @@ -280,7 +281,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt (*thread) = new_thread; /* Create thread */ - new_thread->native_thr = Core::Thread(); + new_thread->native_thr = Core::NativeThread(); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); if (ret) { @@ -412,6 +413,33 @@ int PS4_SYSV_ABI posix_pthread_getschedparam(PthreadT pthread, SchedPolicy* poli return 0; } +int PS4_SYSV_ABI posix_pthread_setschedparam(PthreadT pthread, SchedPolicy policy, + const SchedParam* param) { + if (pthread == nullptr || param == nullptr) { + return POSIX_EINVAL; + } + + auto* thread_state = ThrState::Instance(); + if (pthread == g_curthread) { + g_curthread->lock.lock(); + } else if (int ret = thread_state->FindThread(pthread, /*include dead*/ 0); ret != 0) { + return ret; + } + + if (pthread->attr.sched_policy == policy && + (policy == SchedPolicy::Other || pthread->attr.prio == param->sched_priority)) { + pthread->attr.prio = param->sched_priority; + pthread->lock.unlock(); + return 0; + } + + // TODO: _thr_setscheduler + pthread->attr.sched_policy = policy; + pthread->attr.prio = param->sched_priority; + pthread->lock.unlock(); + return 0; +} + int PS4_SYSV_ABI scePthreadGetprio(PthreadT thread, int* priority) { SchedParam param; SchedPolicy policy; @@ -495,6 +523,7 @@ void RegisterThread(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("lZzFeSxPl08", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setcancelstate); LIB_FUNCTION("a2P9wYGeZvc", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setprio); LIB_FUNCTION("FIs3-UQT9sg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_getschedparam); + LIB_FUNCTION("Xs9hdiD7sAA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_setschedparam); LIB_FUNCTION("6XG4B33N09g", "libScePosix", 1, "libkernel", 1, 1, sched_yield); // Posix-Kernel diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index b41ca2abd..9d71c75e8 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -259,7 +259,7 @@ struct Pthread { int refcount; PthreadEntryFunc start_routine; void* arg; - Core::Thread native_thr; + Core::NativeThread native_thr; PthreadAttr attr; bool cancel_enable; bool cancel_pending; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 15fde2a57..3e1cd441f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -328,7 +328,7 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem } // Map the file. - impl.MapFile(mapped_addr, size, offset, std::bit_cast(prot), fd); + impl.MapFile(mapped_addr, size_aligned, offset, std::bit_cast(prot), fd); // Add virtual memory area auto& new_vma = CarveVMA(mapped_addr, size_aligned)->second; @@ -512,9 +512,8 @@ int MemoryManager::VirtualQuery(VAddr addr, int flags, info->is_flexible.Assign(vma.type == VMAType::Flexible); info->is_direct.Assign(vma.type == VMAType::Direct); info->is_stack.Assign(vma.type == VMAType::Stack); - info->is_pooled.Assign(vma.type == VMAType::Pooled); - info->is_committed.Assign(vma.type != VMAType::Free && vma.type != VMAType::Reserved && - vma.type != VMAType::PoolReserved); + info->is_pooled.Assign(vma.type == VMAType::PoolReserved); + info->is_committed.Assign(vma.type == VMAType::Pooled); vma.name.copy(info->name.data(), std::min(info->name.size(), vma.name.size())); if (vma.type == VMAType::Direct) { const auto dmem_it = FindDmemArea(vma.phys_base); @@ -585,6 +584,7 @@ void MemoryManager::NameVirtualRange(VAddr virtual_addr, size_t size, std::strin "Range provided is not fully contained in vma"); it->second.name = name; } + VAddr MemoryManager::SearchFree(VAddr virtual_addr, size_t size, u32 alignment) { // If the requested address is below the mapped range, start search from the lowest address auto min_search_address = impl.SystemManagedVirtualBase(); @@ -691,7 +691,7 @@ MemoryManager::DMemHandle MemoryManager::Split(DMemHandle dmem_handle, size_t of new_area.size -= offset_in_area; return dmem_map.emplace_hint(std::next(dmem_handle), new_area.base, new_area); -}; +} int MemoryManager::GetDirectMemoryType(PAddr addr, int* directMemoryTypeOut, void** directMemoryStartOut, void** directMemoryEndOut) { diff --git a/src/core/thread.cpp b/src/core/thread.cpp index a93f16c8d..f87e3c8dc 100644 --- a/src/core/thread.cpp +++ b/src/core/thread.cpp @@ -4,45 +4,125 @@ #include "libraries/kernel/threads/pthread.h" #include "thread.h" +#include "core/libraries/kernel/threads/pthread.h" + #ifdef _WIN64 #include +#include "common/ntapi.h" #else #include #endif namespace Core { -Thread::Thread() : native_handle{0} {} - -Thread::~Thread() {} - -int Thread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { #ifdef _WIN64 - native_handle = CreateThread(nullptr, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, nullptr); - return native_handle ? 0 : -1; -#else +#define KGDT64_R3_DATA (0x28) +#define KGDT64_R3_CODE (0x30) +#define KGDT64_R3_CMTEB (0x50) +#define RPL_MASK (0x03) + +#define INITIAL_FPUCW (0x037f) +#define INITIAL_MXCSR_MASK (0xffbf) +#define EFLAGS_INTERRUPT_MASK (0x200) + +void InitializeTeb(INITIAL_TEB* teb, const ::Libraries::Kernel::PthreadAttr* attr) { + teb->StackBase = (void*)((u64)attr->stackaddr_attr + attr->stacksize_attr); + teb->StackLimit = nullptr; + teb->StackAllocationBase = attr->stackaddr_attr; +} + +void InitializeContext(CONTEXT* ctx, ThreadFunc func, void* arg, + const ::Libraries::Kernel::PthreadAttr* attr) { + /* Note: The stack has to be reversed */ + ctx->Rsp = (u64)attr->stackaddr_attr + attr->stacksize_attr; + ctx->Rbp = (u64)attr->stackaddr_attr + attr->stacksize_attr; + ctx->Rcx = (u64)arg; + ctx->Rip = (u64)func; + + ctx->SegGs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegEs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegDs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegCs = KGDT64_R3_CODE | RPL_MASK; + ctx->SegSs = KGDT64_R3_DATA | RPL_MASK; + ctx->SegFs = KGDT64_R3_CMTEB | RPL_MASK; + + ctx->EFlags = 0x3000 | EFLAGS_INTERRUPT_MASK; + ctx->MxCsr = INITIAL_MXCSR; + + ctx->FltSave.ControlWord = INITIAL_FPUCW; + ctx->FltSave.MxCsr = INITIAL_MXCSR; + ctx->FltSave.MxCsr_Mask = INITIAL_MXCSR_MASK; + + ctx->ContextFlags = + CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_SEGMENTS | CONTEXT_FLOATING_POINT; +} +#endif + +NativeThread::NativeThread() : native_handle{0} {} + +NativeThread::~NativeThread() {} + +int NativeThread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { +#ifndef _WIN64 pthread_t* pthr = reinterpret_cast(&native_handle); pthread_attr_t pattr; pthread_attr_init(&pattr); pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr); return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); +#else + CLIENT_ID clientId{}; + INITIAL_TEB teb{}; + CONTEXT ctx{}; + + clientId.UniqueProcess = GetCurrentProcess(); + clientId.UniqueThread = GetCurrentThread(); + + InitializeTeb(&teb, attr); + InitializeContext(&ctx, func, arg, attr); + + return NtCreateThread(&native_handle, THREAD_ALL_ACCESS, nullptr, GetCurrentProcess(), + &clientId, &ctx, &teb, false); #endif } -void Thread::Exit() { +void NativeThread::Exit() { if (!native_handle) { return; } + tid = 0; + #ifdef _WIN64 - CloseHandle(native_handle); + NtClose(native_handle); native_handle = nullptr; - // We call this assuming the thread has finished execution. - ExitThread(0); + /* The Windows kernel will free the stack + given at thread creation via INITIAL_TEB + (StackAllocationBase) upon thread termination. + + In earlier Windows versions (NT4 to Windows Server 2003), + you could get around this via disabling FreeStackOnTermination + on the TEB. This has been removed since then. + + To avoid this, we must forcefully set the TEB + deallocation stack pointer to NULL so ZwFreeVirtualMemory fails + in the kernel and our stack is not freed. + */ + auto* teb = reinterpret_cast(NtCurrentTeb()); + teb->DeallocationStack = nullptr; + + NtTerminateThread(nullptr, 0); #else pthread_exit(nullptr); #endif } +void NativeThread::Initialize() { +#if _WIN64 + tid = GetCurrentThreadId(); +#else + tid = (u64)pthread_self(); +#endif +} + } // namespace Core \ No newline at end of file diff --git a/src/core/thread.h b/src/core/thread.h index cfb8b8309..3bac0e699 100644 --- a/src/core/thread.h +++ b/src/core/thread.h @@ -11,27 +11,34 @@ struct PthreadAttr; namespace Core { -class Thread { -public: - using ThreadFunc = void (*)(void*); - using PthreadFunc = void* (*)(void*); +using ThreadFunc = void (*)(void*); +using PthreadFunc = void* (*)(void*); - Thread(); - ~Thread(); +class NativeThread { +public: + NativeThread(); + ~NativeThread(); int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr); void Exit(); + void Initialize(); + uintptr_t GetHandle() { return reinterpret_cast(native_handle); } + u64 GetTid() { + return tid; + } + private: -#if _WIN64 +#ifdef _WIN64 void* native_handle; #else uintptr_t native_handle; #endif + u64 tid; }; } // namespace Core \ No newline at end of file diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index f7b710edd..b81806d10 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -46,7 +46,7 @@ Liverpool::~Liverpool() { } void Liverpool::Process(std::stop_token stoken) { - Common::SetCurrentThreadName("shadPS4:GPU_CommandProcessor"); + Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor"); while (!stoken.stop_requested()) { { From 15ae7a094d3a4430ac2e7358c3a85af1409de404 Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:45:55 +0100 Subject: [PATCH 3/8] hotfix: fix inverted operator on GetDents --- src/core/libraries/kernel/file_system.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 447467cb7..2a65255fb 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -656,7 +656,7 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { if (file == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } - if (file->type != Core::FileSys::FileType::Device) { + if (file->type == Core::FileSys::FileType::Device) { return file->device->getdents(buf, nbytes, basep); } From 37f4bad2b7efd74643f2b7d579011aa942e251f2 Mon Sep 17 00:00:00 2001 From: psucien <168137814+psucien@users.noreply.github.com> Date: Thu, 5 Dec 2024 22:09:22 +0100 Subject: [PATCH 4/8] video_core: fix for targets clears and copies (#1670) --- .../renderer_vulkan/vk_rasterizer.cpp | 49 ++++++++++++++----- .../texture_cache/texture_cache.cpp | 15 +++--- src/video_core/texture_cache/texture_cache.h | 23 +++++++-- 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 084b7c345..620e5f103 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -102,9 +102,6 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { continue; } - const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); - texture_cache.TouchMeta(col_buf.CmaskAddress(), false); - const auto& hint = liverpool->last_cb_extent[col_buf_id]; auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{col_buf, hint}); @@ -113,6 +110,10 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { auto& image = texture_cache.GetImage(image_id); image.binding.is_target = 1u; + const auto slice = image_view.info.range.base.layer; + const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress(), slice); + texture_cache.TouchMeta(col_buf.CmaskAddress(), slice, false); + const auto mip = image_view.info.range.base.level; state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); @@ -134,8 +135,6 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { (regs.depth_control.stencil_enable && regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); - const bool is_clear = regs.depth_render_control.depth_clear_enable || - texture_cache.IsMetaCleared(htile_address); const auto& hint = liverpool->last_db_extent; auto& [image_id, desc] = db_desc.emplace(std::piecewise_construct, std::tuple{}, @@ -146,6 +145,11 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { auto& image = texture_cache.GetImage(image_id); image.binding.is_target = 1u; + const auto slice = image_view.info.range.base.layer; + const bool is_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address, slice); + ASSERT(desc.view_info.range.extent.layers == 1); + state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); state.depth_image = image.image; @@ -157,7 +161,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, .stencil = regs.stencil_clear}}, }; - texture_cache.TouchMeta(htile_address, false); + texture_cache.TouchMeta(htile_address, slice, false); state.has_depth = regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid; state.has_stencil = regs.depth_buffer.stencil_info.format != @@ -359,9 +363,11 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { // will need its full emulation anyways. For cases of metadata read a warning will be // logged. const auto IsMetaUpdate = [&](const auto& desc) { - const VAddr address = desc.GetSharp(info).base_address; + const auto sharp = desc.GetSharp(info); + const VAddr address = sharp.base_address; if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { + // Assume all slices were updates + if (texture_cache.ClearMeta(address)) { LOG_TRACE(Render_Vulkan, "Metadata update skipped"); return true; } @@ -373,17 +379,36 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { return false; }; + // Assume if a shader reads and writes metas at the same time, it is a copy shader. + bool meta_read = false; for (const auto& desc : info.buffers) { if (desc.is_gds_buffer) { continue; } - if (IsMetaUpdate(desc)) { - return false; + if (!desc.is_written) { + const VAddr address = desc.GetSharp(info).base_address; + meta_read = texture_cache.IsMeta(address); } } + for (const auto& desc : info.texture_buffers) { - if (IsMetaUpdate(desc)) { - return false; + if (!desc.is_written) { + const VAddr address = desc.GetSharp(info).base_address; + meta_read = texture_cache.IsMeta(address); + } + } + + if (!meta_read) { + for (const auto& desc : info.buffers) { + if (IsMetaUpdate(desc)) { + return false; + } + } + + for (const auto& desc : info.texture_buffers) { + if (IsMetaUpdate(desc)) { + return false; + } } } } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 4373fdc52..1670648b3 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -398,17 +398,15 @@ ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.cmask_addr) { - surface_metas.emplace( - desc.info.meta_info.cmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); + surface_metas.emplace(desc.info.meta_info.cmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::CMask}); image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr; image.flags |= ImageFlagBits::MetaRegistered; } if (desc.info.meta_info.fmask_addr) { - surface_metas.emplace( - desc.info.meta_info.fmask_addr, - MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); + surface_metas.emplace(desc.info.meta_info.fmask_addr, + MetaDataInfo{.type = MetaDataInfo::Type::FMask}); image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; image.flags |= ImageFlagBits::MetaRegistered; } @@ -428,9 +426,8 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { if (desc.info.meta_info.htile_addr) { - surface_metas.emplace( - desc.info.meta_info.htile_addr, - MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); + surface_metas.emplace(desc.info.meta_info.htile_addr, + MetaDataInfo{.type = MetaDataInfo::Type::HTile}); image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr; image.flags |= ImageFlagBits::MetaRegistered; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index fab4c832f..676ede777 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -156,18 +156,31 @@ public: return surface_metas.contains(address); } - bool IsMetaCleared(VAddr address) const { + bool IsMetaCleared(VAddr address, u32 slice) const { const auto& it = surface_metas.find(address); if (it != surface_metas.end()) { - return it.value().is_cleared; + return it.value().clear_mask & (1u << slice); } return false; } - bool TouchMeta(VAddr address, bool is_clear) { + bool ClearMeta(VAddr address) { auto it = surface_metas.find(address); if (it != surface_metas.end()) { - it.value().is_cleared = is_clear; + it.value().clear_mask = u32(-1); + return true; + } + return false; + } + + bool TouchMeta(VAddr address, u32 slice, bool is_clear) { + auto it = surface_metas.find(address); + if (it != surface_metas.end()) { + if (is_clear) { + it.value().clear_mask |= 1u << slice; + } else { + it.value().clear_mask &= ~(1u << slice); + } return true; } return false; @@ -280,7 +293,7 @@ private: HTile, }; Type type; - bool is_cleared; + u32 clear_mask{u32(-1)}; }; tsl::robin_map surface_metas; }; From 7fbe15de284c923187a1e06f0267ccb1cd84aea0 Mon Sep 17 00:00:00 2001 From: Richard Habitzreuter Date: Thu, 5 Dec 2024 18:09:43 -0300 Subject: [PATCH 5/8] Missing dependency on building-windows.md (#1658) * Missing dependency on building-windows.md * Update building-windows.md --- documents/building-windows.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documents/building-windows.md b/documents/building-windows.md index 48fd09c41..0da630f0b 100644 --- a/documents/building-windows.md +++ b/documents/building-windows.md @@ -79,7 +79,7 @@ Normal x86-based computers, follow: 1. Open "MSYS2 MINGW64" from your new applications 2. Run `pacman -Syu`, let it complete; -3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja mingw-w64-x86_64-ffmpeg` +3. Run `pacman -S --needed git mingw-w64-x86_64-binutils mingw-w64-x86_64-clang mingw-w64-x86_64-cmake mingw-w64-x86_64-rapidjson mingw-w64-x86_64-ninja mingw-w64-x86_64-ffmpeg` 1. Optional (Qt only): run `pacman -S --needed mingw-w64-x86_64-qt6-base mingw-w64-x86_64-qt6-tools mingw-w64-x86_64-qt6-multimedia` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` @@ -93,7 +93,7 @@ ARM64-based computers, follow: 1. Open "MSYS2 CLANGARM64" from your new applications 2. Run `pacman -Syu`, let it complete; -3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` +3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-rapidjson mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` 1. Optional (Qt only): run `pacman -S --needed mingw-w64-clang-aarch64-qt6-base mingw-w64-clang-aarch64-qt6-tools mingw-w64-clang-aarch64-qt6-multimedia` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` From 642dedea8c9099b0b84ff4cd653d0c79fc9bb4d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Thu, 5 Dec 2024 22:09:59 +0100 Subject: [PATCH 6/8] Handle INDIRECT_BUFFER_CONST in ProcessCeUpdate (#1613) --- src/video_core/amdgpu/liverpool.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index b81806d10..38fca6bc0 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -161,6 +161,19 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { } break; } + case PM4ItOpcode::IndirectBufferConst: { + const auto* indirect_buffer = reinterpret_cast(header); + auto task = ProcessCeUpdate( + {indirect_buffer->Address(), indirect_buffer->ib_size}); + while (!task.handle.done()) { + task.handle.resume(); + + TracyFiberLeave; + co_yield {}; + TracyFiberEnter(ccb_task_name); + }; + break; + } default: const u32 count = header->type3.NumWords(); UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}", From 874508f8c2bd721e1f97a0a877dd4686bf43ef83 Mon Sep 17 00:00:00 2001 From: Alexandre Bouvier Date: Thu, 5 Dec 2024 21:10:27 +0000 Subject: [PATCH 7/8] cmake: unbundle stb (#1601) --- CMakeLists.txt | 5 ++++- REUSE.toml | 2 +- cmake/Findstb.cmake | 19 +++++++++++++++++++ externals/CMakeLists.txt | 7 +++++++ externals/{ => stb}/stb_image.h | 0 src/common/stb.cpp | 7 +++++++ src/common/stb.h | 6 ++++++ src/core/file_format/splash.cpp | 6 +----- src/imgui/renderer/texture_manager.cpp | 3 +-- 9 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 cmake/Findstb.cmake rename externals/{ => stb}/stb_image.h (100%) create mode 100644 src/common/stb.cpp create mode 100644 src/common/stb.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 56760be37..378b8f78d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,6 +117,7 @@ find_package(magic_enum 0.9.6 CONFIG) find_package(PNG 1.6 MODULE) find_package(RenderDoc 1.6.0 MODULE) find_package(SDL3 3.1.2 CONFIG) +find_package(stb MODULE) find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) find_package(VulkanHeaders 1.3.289 CONFIG) @@ -495,6 +496,8 @@ set(COMMON src/common/logging/backend.cpp src/common/slot_vector.h src/common/spin_lock.cpp src/common/spin_lock.h + src/common/stb.cpp + src/common/stb.h src/common/string_util.cpp src/common/string_util.h src/common/thread.cpp @@ -867,7 +870,7 @@ endif() create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::SPIRV glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/REUSE.toml b/REUSE.toml index 2d94c9292..5bd21bead 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -63,7 +63,7 @@ SPDX-FileCopyrightText = "2019-2024 Baldur Karlsson" SPDX-License-Identifier = "MIT" [[annotations]] -path = "externals/stb_image.h" +path = "externals/stb/**" precedence = "aggregate" SPDX-FileCopyrightText = "2017 Sean Barrett" SPDX-License-Identifier = "MIT" diff --git a/cmake/Findstb.cmake b/cmake/Findstb.cmake new file mode 100644 index 000000000..667911e1d --- /dev/null +++ b/cmake/Findstb.cmake @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +find_path(stb_image_INCLUDE_DIR stb_image.h PATH_SUFFIXES stb) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(stb + REQUIRED_VARS stb_image_INCLUDE_DIR +) + +if (stb_FOUND AND NOT TARGET stb::headers) + add_library(stb::headers INTERFACE IMPORTED) + set_property(TARGET stb::headers PROPERTY + INTERFACE_INCLUDE_DIRECTORIES + "${stb_image_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced(stb_image_INCLUDE_DIR) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index bc2d41bda..8ccae8070 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -222,3 +222,10 @@ endif() # GCN Headers add_subdirectory(gcn) + +# stb +if (NOT TARGET stb::headers) + add_library(stb INTERFACE) + target_include_directories(stb INTERFACE stb) + add_library(stb::headers ALIAS stb) +endif() diff --git a/externals/stb_image.h b/externals/stb/stb_image.h similarity index 100% rename from externals/stb_image.h rename to externals/stb/stb_image.h diff --git a/src/common/stb.cpp b/src/common/stb.cpp new file mode 100644 index 000000000..0cd916185 --- /dev/null +++ b/src/common/stb.cpp @@ -0,0 +1,7 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#define STB_IMAGE_IMPLEMENTATION +#define STBI_ONLY_PNG +#define STBI_NO_STDIO +#include "common/stb.h" diff --git a/src/common/stb.h b/src/common/stb.h new file mode 100644 index 000000000..6f4d34483 --- /dev/null +++ b/src/common/stb.h @@ -0,0 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include diff --git a/src/core/file_format/splash.cpp b/src/core/file_format/splash.cpp index 5e06c912d..b68702157 100644 --- a/src/core/file_format/splash.cpp +++ b/src/core/file_format/splash.cpp @@ -5,13 +5,9 @@ #include "common/assert.h" #include "common/io_file.h" +#include "common/stb.h" #include "splash.h" -#define STB_IMAGE_IMPLEMENTATION -#define STBI_ONLY_PNG -#define STBI_NO_STDIO -#include "externals/stb_image.h" - bool Splash::Open(const std::filesystem::path& filepath) { ASSERT_MSG(filepath.stem().string() != "png", "Unexpected file format passed"); diff --git a/src/imgui/renderer/texture_manager.cpp b/src/imgui/renderer/texture_manager.cpp index 7f9c69d49..dd233ee60 100644 --- a/src/imgui/renderer/texture_manager.cpp +++ b/src/imgui/renderer/texture_manager.cpp @@ -4,12 +4,11 @@ #include #include -#include - #include "common/assert.h" #include "common/config.h" #include "common/io_file.h" #include "common/polyfill_thread.h" +#include "common/stb.h" #include "imgui_impl_vulkan.h" #include "texture_manager.h" From 22a2741ea01dcd10ad207da9bb87211cda179099 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Thu, 5 Dec 2024 23:14:16 +0200 Subject: [PATCH 8/8] shader_recompilers: Improvements to SSA phi generation and lane instruction elimination (#1667) * shader_recompiler: Add use tracking for Insts * ssa_rewrite: Recursively remove phis * ssa_rewrite: Correct recursive trivial phi elimination * ir: Improve read lane folding pass * control_flow: Avoid adding unnecessary divergant blocks * clang format * externals: Update ext-boost --------- Co-authored-by: Frodo Baggins --- externals/ext-boost | 2 +- .../frontend/control_flow_graph.cpp | 43 ++++++---- src/shader_recompiler/ir/basic_block.cpp | 2 + src/shader_recompiler/ir/microinstruction.cpp | 50 +++++++----- .../ir/passes/constant_propagation_pass.cpp | 81 ++++++++++++++----- .../passes/lower_shared_mem_to_registers.cpp | 2 +- .../ir/passes/resource_tracking_pass.cpp | 2 +- .../ir/passes/ssa_rewrite_pass.cpp | 19 ++--- src/shader_recompiler/ir/value.h | 48 +++++++++-- src/video_core/amdgpu/liverpool.cpp | 2 +- 10 files changed, 175 insertions(+), 76 deletions(-) diff --git a/externals/ext-boost b/externals/ext-boost index f2474e1b5..ca6f230e6 160000 --- a/externals/ext-boost +++ b/externals/ext-boost @@ -1 +1 @@ -Subproject commit f2474e1b584fb7a3ed6f85ba875e6eacd742ec8a +Subproject commit ca6f230e67be7cc45fc919057f07b2aee64dadc1 diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index 354196d31..8c3122b28 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -47,6 +47,15 @@ static IR::Condition MakeCondition(const GcnInst& inst) { } } +static bool IgnoresExecMask(Opcode opcode) { + switch (opcode) { + case Opcode::V_WRITELANE_B32: + return true; + default: + return false; + } +} + static constexpr size_t LabelReserveSize = 32; CFG::CFG(Common::ObjectPool& block_pool_, std::span inst_list_) @@ -133,20 +142,26 @@ void CFG::EmitDivergenceLabels() { curr_begin = -1; continue; } - // Add a label to the instruction right after the open scope call. - // It is the start of a new basic block. - const auto& save_inst = inst_list[curr_begin]; - const Label label = index_to_pc[curr_begin] + save_inst.length; - AddLabel(label); - // Add a label to the close scope instruction. - // There are 3 cases where we need to close a scope. - // * Close scope instruction inside the block - // * Close scope instruction at the end of the block (cbranch or endpgm) - // * Normal instruction at the end of the block - // For the last case we must NOT add a label as that would cause - // the instruction to be separated into its own basic block. - if (is_close) { - AddLabel(index_to_pc[index]); + // If all instructions in the scope ignore exec masking, we shouldn't insert a + // scope. + const auto start = inst_list.begin() + curr_begin + 1; + if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask, + &GcnInst::opcode)) { + // Add a label to the instruction right after the open scope call. + // It is the start of a new basic block. + const auto& save_inst = inst_list[curr_begin]; + const Label label = index_to_pc[curr_begin] + save_inst.length; + AddLabel(label); + // Add a label to the close scope instruction. + // There are 3 cases where we need to close a scope. + // * Close scope instruction inside the block + // * Close scope instruction at the end of the block (cbranch or endpgm) + // * Normal instruction at the end of the block + // For the last case we must NOT add a label as that would cause + // the instruction to be separated into its own basic block. + if (is_close) { + AddLabel(index_to_pc[index]); + } } // Reset scope begin. curr_begin = -1; diff --git a/src/shader_recompiler/ir/basic_block.cpp b/src/shader_recompiler/ir/basic_block.cpp index 426acb2b8..b4d1a78c7 100644 --- a/src/shader_recompiler/ir/basic_block.cpp +++ b/src/shader_recompiler/ir/basic_block.cpp @@ -19,12 +19,14 @@ void Block::AppendNewInst(Opcode op, std::initializer_list args) { Block::iterator Block::PrependNewInst(iterator insertion_point, const Inst& base_inst) { Inst* const inst{inst_pool->Create(base_inst)}; + inst->SetParent(this); return instructions.insert(insertion_point, *inst); } Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list args, u32 flags) { Inst* const inst{inst_pool->Create(op, flags)}; + inst->SetParent(this); const auto result_it{instructions.insert(insertion_point, *inst)}; if (inst->NumArgs() != args.size()) { diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index abd31a728..9b4ad63d2 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "shader_recompiler/exception.h" @@ -119,10 +120,10 @@ void Inst::SetArg(size_t index, Value value) { } const IR::Value arg{Arg(index)}; if (!arg.IsImmediate()) { - UndoUse(arg); + UndoUse(arg.Inst(), index); } if (!value.IsImmediate()) { - Use(value); + Use(value.Inst(), index); } if (op == Opcode::Phi) { phi_args[index].second = value; @@ -143,7 +144,7 @@ Block* Inst::PhiBlock(size_t index) const { void Inst::AddPhiOperand(Block* predecessor, const Value& value) { if (!value.IsImmediate()) { - Use(value); + Use(value.Inst(), phi_args.size()); } phi_args.emplace_back(predecessor, value); } @@ -155,17 +156,19 @@ void Inst::Invalidate() { void Inst::ClearArgs() { if (op == Opcode::Phi) { - for (auto& pair : phi_args) { + for (auto i = 0; i < phi_args.size(); i++) { + auto& pair = phi_args[i]; IR::Value& value{pair.second}; if (!value.IsImmediate()) { - UndoUse(value); + UndoUse(value.Inst(), i); } } phi_args.clear(); } else { - for (auto& value : args) { + for (auto i = 0; i < args.size(); i++) { + auto& value = args[i]; if (!value.IsImmediate()) { - UndoUse(value); + UndoUse(value.Inst(), i); } } // Reset arguments to null @@ -174,13 +177,21 @@ void Inst::ClearArgs() { } } -void Inst::ReplaceUsesWith(Value replacement) { - Invalidate(); - ReplaceOpcode(Opcode::Identity); - if (!replacement.IsImmediate()) { - Use(replacement); +void Inst::ReplaceUsesWith(Value replacement, bool preserve) { + // Copy since user->SetArg will mutate this->uses + // Could also do temp_uses = std::move(uses) but more readable + const auto temp_uses = uses; + for (const auto& [user, operand] : temp_uses) { + DEBUG_ASSERT(user->Arg(operand).Inst() == this); + user->SetArg(operand, replacement); + } + Invalidate(); + if (preserve) { + // Still useful to have Identity for indirection. + // SSA pass would be more complicated without it + ReplaceOpcode(Opcode::Identity); + SetArg(0, replacement); } - args[0] = replacement; } void Inst::ReplaceOpcode(IR::Opcode opcode) { @@ -195,14 +206,15 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) { op = opcode; } -void Inst::Use(const Value& value) { - Inst* const inst{value.Inst()}; - ++inst->use_count; +void Inst::Use(Inst* used, u32 operand) { + DEBUG_ASSERT(0 == std::count(used->uses.begin(), used->uses.end(), IR::Use(this, operand))); + used->uses.emplace_front(this, operand); } -void Inst::UndoUse(const Value& value) { - Inst* const inst{value.Inst()}; - --inst->use_count; +void Inst::UndoUse(Inst* used, u32 operand) { + IR::Use use(this, operand); + DEBUG_ASSERT(1 == std::count(used->uses.begin(), used->uses.end(), use)); + used->uses.remove(use); } } // namespace Shader::IR diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp index a03fe051c..9624ce6a5 100644 --- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp @@ -43,7 +43,7 @@ bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { if (is_lhs_immediate && is_rhs_immediate) { const auto result{imm_fn(Arg(lhs), Arg(rhs))}; - inst.ReplaceUsesWith(IR::Value{result}); + inst.ReplaceUsesWithAndRemove(IR::Value{result}); return false; } if (is_lhs_immediate && !is_rhs_immediate) { @@ -75,7 +75,7 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { return false; } using Indices = std::make_index_sequence::NUM_ARGS>; - inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); + inst.ReplaceUsesWithAndRemove(EvalImmediates(inst, func, Indices{})); return true; } @@ -83,12 +83,12 @@ template void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { const IR::Value value{inst.Arg(0)}; if (value.IsImmediate()) { - inst.ReplaceUsesWith(IR::Value{std::bit_cast(Arg(value))}); + inst.ReplaceUsesWithAndRemove(IR::Value{std::bit_cast(Arg(value))}); return; } IR::Inst* const arg_inst{value.InstRecursive()}; if (arg_inst->GetOpcode() == reverse) { - inst.ReplaceUsesWith(arg_inst->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0)); return; } } @@ -131,7 +131,7 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser if (!result) { return; } - inst.ReplaceUsesWith(*result); + inst.ReplaceUsesWithAndRemove(*result); } void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { @@ -141,7 +141,7 @@ void FoldConvert(IR::Inst& inst, IR::Opcode opposite) { } IR::Inst* const producer{value.InstRecursive()}; if (producer->GetOpcode() == opposite) { - inst.ReplaceUsesWith(producer->Arg(0)); + inst.ReplaceUsesWithAndRemove(producer->Arg(0)); } } @@ -152,9 +152,9 @@ void FoldLogicalAnd(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate()) { if (rhs.U1()) { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); } else { - inst.ReplaceUsesWith(IR::Value{false}); + inst.ReplaceUsesWithAndRemove(IR::Value{false}); } } } @@ -162,7 +162,7 @@ void FoldLogicalAnd(IR::Inst& inst) { void FoldSelect(IR::Inst& inst) { const IR::Value cond{inst.Arg(0)}; if (cond.IsImmediate()) { - inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); + inst.ReplaceUsesWithAndRemove(cond.U1() ? inst.Arg(1) : inst.Arg(2)); } } @@ -173,9 +173,9 @@ void FoldLogicalOr(IR::Inst& inst) { const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate()) { if (rhs.U1()) { - inst.ReplaceUsesWith(IR::Value{true}); + inst.ReplaceUsesWithAndRemove(IR::Value{true}); } else { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); } } } @@ -183,12 +183,12 @@ void FoldLogicalOr(IR::Inst& inst) { void FoldLogicalNot(IR::Inst& inst) { const IR::U1 value{inst.Arg(0)}; if (value.IsImmediate()) { - inst.ReplaceUsesWith(IR::Value{!value.U1()}); + inst.ReplaceUsesWithAndRemove(IR::Value{!value.U1()}); return; } IR::Inst* const arg{value.InstRecursive()}; if (arg->GetOpcode() == IR::Opcode::LogicalNot) { - inst.ReplaceUsesWith(arg->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg->Arg(0)); } } @@ -199,7 +199,7 @@ void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { } IR::Inst* const arg_inst{value.InstRecursive()}; if (arg_inst->GetOpcode() == reverse) { - inst.ReplaceUsesWith(arg_inst->Arg(0)); + inst.ReplaceUsesWithAndRemove(arg_inst->Arg(0)); return; } } @@ -211,7 +211,7 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { } const IR::Value rhs{inst.Arg(1)}; if (rhs.IsImmediate() && Arg(rhs) == 0) { - inst.ReplaceUsesWith(inst.Arg(0)); + inst.ReplaceUsesWithAndRemove(inst.Arg(0)); return; } } @@ -226,21 +226,58 @@ void FoldCmpClass(IR::Block& block, IR::Inst& inst) { } else if ((class_mask & IR::FloatClassFunc::Finite) == IR::FloatClassFunc::Finite) { IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const IR::F32 value = IR::F32{inst.Arg(0)}; - inst.ReplaceUsesWith(ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value)))); + inst.ReplaceUsesWithAndRemove( + ir.LogicalNot(ir.LogicalOr(ir.FPIsInf(value), ir.FPIsInf(value)))); } else { UNREACHABLE(); } } -void FoldReadLane(IR::Inst& inst) { +void FoldReadLane(IR::Block& block, IR::Inst& inst) { const u32 lane = inst.Arg(1).U32(); IR::Inst* prod = inst.Arg(0).InstRecursive(); - while (prod->GetOpcode() == IR::Opcode::WriteLane) { - if (prod->Arg(2).U32() == lane) { - inst.ReplaceUsesWith(prod->Arg(1)); + + const auto search_chain = [lane](const IR::Inst* prod) -> IR::Value { + while (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (prod->Arg(2).U32() == lane) { + return prod->Arg(1); + } + prod = prod->Arg(0).InstRecursive(); + } + return {}; + }; + + if (prod->GetOpcode() == IR::Opcode::WriteLane) { + if (const IR::Value value = search_chain(prod); !value.IsEmpty()) { + inst.ReplaceUsesWith(value); + } + return; + } + + if (prod->GetOpcode() == IR::Opcode::Phi) { + boost::container::small_vector phi_args; + for (size_t arg_index = 0; arg_index < prod->NumArgs(); ++arg_index) { + const IR::Inst* arg{prod->Arg(arg_index).InstRecursive()}; + if (arg->GetOpcode() != IR::Opcode::WriteLane) { + return; + } + const IR::Value value = search_chain(arg); + if (value.IsEmpty()) { + continue; + } + phi_args.emplace_back(value); + } + if (std::ranges::all_of(phi_args, [&](IR::Value value) { return value == phi_args[0]; })) { + inst.ReplaceUsesWith(phi_args[0]); return; } - prod = prod->Arg(0).InstRecursive(); + const auto insert_point = IR::Block::InstructionList::s_iterator_to(*prod); + IR::Inst* const new_phi{&*block.PrependNewInst(insert_point, IR::Opcode::Phi)}; + new_phi->SetFlags(IR::Type::U32); + for (size_t arg_index = 0; arg_index < phi_args.size(); arg_index++) { + new_phi->AddPhiOperand(prod->PhiBlock(arg_index), phi_args[arg_index]); + } + inst.ReplaceUsesWith(IR::Value{new_phi}); } } @@ -290,7 +327,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { case IR::Opcode::SelectF64: return FoldSelect(inst); case IR::Opcode::ReadLane: - return FoldReadLane(inst); + return FoldReadLane(block, inst); case IR::Opcode::FPNeg32: FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); return; diff --git a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp index 76bfcf911..c109f3595 100644 --- a/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp +++ b/src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp @@ -25,7 +25,7 @@ void LowerSharedMemToRegisters(IR::Program& program) { }); ASSERT(it != ds_writes.end()); // Replace data read with value written. - inst.ReplaceUsesWith((*it)->Arg(1)); + inst.ReplaceUsesWithAndRemove((*it)->Arg(1)); } } } diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index c1ff3d2f2..89c5c78a0 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -596,7 +596,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, } return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); }(); - inst.ReplaceUsesWith(new_inst); + inst.ReplaceUsesWithAndRemove(new_inst); } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { diff --git a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp index df73c1bc8..1d252bee1 100644 --- a/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp +++ b/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp @@ -164,7 +164,6 @@ IR::Opcode UndefOpcode(const FlagTag) noexcept { enum class Status { Start, SetValue, - PreparePhiArgument, PushPhiArgument, }; @@ -253,12 +252,10 @@ public: IR::Inst* const phi{stack.back().phi}; phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); ++stack.back().pred_it; - } - [[fallthrough]]; - case Status::PreparePhiArgument: prepare_phi_operand(); break; } + } } while (stack.size() > 1); return stack.back().result; } @@ -266,9 +263,7 @@ public: void SealBlock(IR::Block* block) { const auto it{incomplete_phis.find(block)}; if (it != incomplete_phis.end()) { - for (auto& pair : it->second) { - auto& variant{pair.first}; - auto& phi{pair.second}; + for (auto& [variant, phi] : it->second) { std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); } } @@ -289,7 +284,7 @@ private: const size_t num_args{phi.NumArgs()}; for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { const IR::Value& op{phi.Arg(arg_index)}; - if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { + if (op.Resolve() == same.Resolve() || op.Resolve() == IR::Value{&phi}) { // Unique value or self-reference continue; } @@ -314,9 +309,15 @@ private: ++reinsert_point; } // Reinsert the phi node and reroute all its uses to the "same" value + const auto users = phi.Uses(); list.insert(reinsert_point, phi); phi.ReplaceUsesWith(same); - // TODO: Try to recursively remove all phi users, which might have become trivial + // Try to recursively remove all phi users, which might have become trivial + for (const auto& [user, arg_index] : users) { + if (user->GetOpcode() == IR::Opcode::Phi) { + TryRemoveTrivialPhi(*user, user->GetParent(), undef_opcode); + } + } return same; } diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 7e46747b9..dbe8b5cc4 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,16 @@ public: explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} }; +struct Use { + Inst* user; + u32 operand; + + Use() = default; + Use(Inst* user_, u32 operand_) : user(user_), operand(operand_) {} + Use(const Use&) = default; + bool operator==(const Use&) const noexcept = default; +}; + class Inst : public boost::intrusive::list_base_hook<> { public: explicit Inst(IR::Opcode op_, u32 flags_) noexcept; @@ -118,14 +129,22 @@ public: Inst& operator=(Inst&&) = delete; Inst(Inst&&) = delete; + IR::Block* GetParent() const { + ASSERT(parent); + return parent; + } + void SetParent(IR::Block* block) { + parent = block; + } + /// Get the number of uses this instruction has. [[nodiscard]] int UseCount() const noexcept { - return use_count; + return uses.size(); } /// Determines whether this instruction has uses or not. [[nodiscard]] bool HasUses() const noexcept { - return use_count > 0; + return uses.size() > 0; } /// Get the opcode this microinstruction represents. @@ -167,7 +186,13 @@ public: void Invalidate(); void ClearArgs(); - void ReplaceUsesWith(Value replacement); + void ReplaceUsesWithAndRemove(Value replacement) { + ReplaceUsesWith(replacement, false); + } + + void ReplaceUsesWith(Value replacement) { + ReplaceUsesWith(replacement, true); + } void ReplaceOpcode(IR::Opcode opcode); @@ -197,25 +222,32 @@ public: return std::bit_cast(definition); } + const auto Uses() const { + return uses; + } + private: struct NonTriviallyDummy { NonTriviallyDummy() noexcept {} }; - void Use(const Value& value); - void UndoUse(const Value& value); + void Use(Inst* used, u32 operand); + void UndoUse(Inst* used, u32 operand); + void ReplaceUsesWith(Value replacement, bool preserve); IR::Opcode op{}; - int use_count{}; u32 flags{}; u32 definition{}; + IR::Block* parent{}; union { NonTriviallyDummy dummy{}; boost::container::small_vector, 2> phi_args; std::array args; }; + + boost::container::list uses; }; -static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); +static_assert(sizeof(Inst) <= 160, "Inst size unintentionally increased"); using U1 = TypedValue; using U8 = TypedValue; @@ -373,4 +405,4 @@ template <> struct hash { std::size_t operator()(const Shader::IR::Value& v) const; }; -} // namespace std \ No newline at end of file +} // namespace std diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 38fca6bc0..1bbd77f82 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -715,7 +715,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { false); } else if (dma_data->src_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - LOG_WARNING(Render_Vulkan, "GDS memory read"); + // LOG_WARNING(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(),