Mac OS ARM Native reaches ELF entry

This commit is contained in:
Niko 2024-09-04 00:36:12 -04:00
parent b87e6f3838
commit e6ed95b080
15 changed files with 136 additions and 19 deletions

View File

@ -657,8 +657,11 @@ if (APPLE)
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
endif()
# Using x86_64 (ARM by default if not set)
if (CMAKE_OSX_ARCHITECTURES)
# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
endif()
# Replacement for std::chrono::time_zone
target_link_libraries(shadps4 PRIVATE date::date-tz)

View File

@ -43,7 +43,15 @@ else()
endif()
if (NOT TARGET FFmpeg::ffmpeg)
if (APPLE)
if (CMAKE_OSX_ARCHITECTURES)
set(ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}")
else()
set(ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
endif()
else()
set(ARCHITECTURE "x86_64")
endif()
add_subdirectory(ffmpeg-core)
add_library(FFmpeg::ffmpeg ALIAS ffmpeg)
endif()

View File

@ -4,7 +4,11 @@
#include "common/assert.h"
#include "common/logging/backend.h"
#ifdef __x86_64__
#define Crash() __asm__ __volatile__("int $3")
#elif __aarch64__
#define Crash() ;
#endif
void assert_fail_impl() {
Common::Log::Stop();

View File

@ -38,7 +38,10 @@ static u64 GetTimeNs() {
#endif
}
u64 EstimateRDTSCFrequency() {
u64 EstimateRDTSCFrequency()
{
#ifdef __x86_64__
// Discard the first result measuring the rdtsc.
FencedRDTSC();
std::this_thread::sleep_for(std::chrono::milliseconds{1});
@ -55,6 +58,12 @@ u64 EstimateRDTSCFrequency() {
const u64 tsc_diff = tsc_end - tsc_start;
const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, end_time - start_time);
return RoundToNearest<100'000>(tsc_freq);
#else
return 0;
#endif
}
} // namespace Common

View File

@ -11,7 +11,10 @@
namespace Common {
#ifdef __x86_64__
#ifdef _MSC_VER
// MSVC
__forceinline static u64 FencedRDTSC() {
_mm_lfence();
_ReadWriteBarrier();
@ -21,6 +24,7 @@ __forceinline static u64 FencedRDTSC() {
return result;
}
#else
// Linux/Mac
static inline u64 FencedRDTSC() {
u64 eax;
u64 edx;
@ -32,6 +36,13 @@ static inline u64 FencedRDTSC() {
}
#endif
#else
// ARM
static inline u64 FencedRDTSC() {
return 0;
}
#endif
u64 EstimateRDTSCFrequency();
} // namespace Common

View File

@ -17,9 +17,11 @@
#endif
#ifdef __APPLE__
#ifdef __x86_64__
// Reserve space for the system address space using a zerofill section.
asm(".zerofill GUEST_SYSTEM,GUEST_SYSTEM,__guest_system,0xFBFC00000");
#endif
#endif
namespace Core {
@ -299,6 +301,7 @@ struct AddressSpace::Impl {
constexpr int protection_flags = PROT_READ | PROT_WRITE;
constexpr int base_map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#ifdef __APPLE__
#ifdef __x86_64__
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. We can allocate the system
// managed region, as well as system reserved if reduced in size slightly, but we cannot map
@ -313,6 +316,14 @@ struct AddressSpace::Impl {
// Cannot guarantee enough space for these areas at the desired addresses, so not MAP_FIXED.
user_base = reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(USER_MIN), user_size,
protection_flags, base_map_flags, -1, 0));
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
const auto virtual_base =
reinterpret_cast<u8*>(mmap(nullptr, virtual_size, protection_flags, base_map_flags | MAP_JIT, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = virtual_base + (SYSTEM_RESERVED_MIN - SYSTEM_MANAGED_MIN);
user_base = virtual_base + (USER_MIN - SYSTEM_MANAGED_MIN);
#endif
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
const auto virtual_base =
@ -382,13 +393,16 @@ struct AddressSpace::Impl {
void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, PosixPageProtection prot,
int fd = -1) {
m_free_regions.subtract({virtual_addr, virtual_addr + size});
const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1;
const off_t host_offset = phys_addr != -1 ? phys_addr : 0;
const int flag = phys_addr != -1 ? MAP_SHARED : (MAP_ANONYMOUS | MAP_PRIVATE);
void* ret = mmap(reinterpret_cast<void*>(virtual_addr), size, prot, MAP_FIXED | flag,
handle, host_offset);
if (phys_addr != -1) {
const int handle = fd == -1 ? backing_fd : fd;
void* ret = mmap(reinterpret_cast<void*>(virtual_addr), size, prot, MAP_FIXED | MAP_SHARED,
handle, phys_addr);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
return ret;
} else {
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, prot);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
return reinterpret_cast<void*>(virtual_addr);
}
void Unmap(VAddr virtual_addr, size_t size, bool) {
@ -407,9 +421,8 @@ struct AddressSpace::Impl {
m_free_regions.insert({start_address, end_address});
// Return the adjusted pointers.
void* ret = mmap(reinterpret_cast<void*>(start_address), end_address - start_address,
PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
int ret = mprotect(reinterpret_cast<void*>(start_address), end_address - start_address, PROT_NONE);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
void Protect(VAddr virtual_addr, size_t size, bool read, bool write, bool execute) {

View File

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// full file
#ifdef __x86_64__
#include <memory>
#include <mutex>
#include <Zydis/Zydis.h>
@ -682,3 +685,5 @@ void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator&
}
} // namespace Core
#endif // __x86_64__

View File

@ -3,6 +3,7 @@
#pragma once
#ifdef __x86_64__
namespace Xbyak {
class CodeGenerator;
}
@ -19,3 +20,4 @@ void CleanupThreadPatchStack();
void PatchInstructions(u64 segment_addr, u64 segment_size, Xbyak::CodeGenerator& c);
} // namespace Core
#endif

View File

@ -988,14 +988,22 @@ static void cleanup_thread(void* arg) {
destructor(value);
}
}
#ifdef __x86_64__
Core::SetTcbBase(nullptr);
#endif
thread->is_almost_done = true;
}
static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str());
#ifdef __x86_64__
Core::InitializeThreadPatchStack();
#endif
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false);
void* ret = nullptr;

View File

@ -26,6 +26,7 @@ static PS4_SYSV_ABI void ProgramExitFunc() {
fmt::print("exit function called\n");
}
#ifdef __x86_64__
static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
// reinterpret_cast<entry_func_t>(addr)(params, exit_func); // can't be used, stack has to have
// a specific layout
@ -47,6 +48,13 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
: "r"(addr), "r"(params), "r"(exit_func)
: "rax", "rsi", "rdi");
}
#elif __aarch64__
static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
printf("Arm Entry\n");
while(1) {}
}
#endif
Linker::Linker() : memory{Memory::Instance()} {}
@ -85,7 +93,11 @@ void Linker::Execute() {
// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
#ifdef __x86_64__
InitializeThreadPatchStack();
#endif
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);
@ -107,7 +119,9 @@ void Linker::Execute() {
}
}
#ifdef __x86_64__
SetTcbBase(nullptr);
#endif
}
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
@ -292,7 +306,9 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
return false;
}
void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
void* Linker::TlsGetAddr(u64 module_index, u64 offset)
{
#ifdef __x86_64__
std::scoped_lock lk{mutex};
DtvEntry* dtv_table = GetTcbBase()->tcb_dtv;
@ -326,6 +342,9 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
addr = dest;
}
return addr + offset;
#else
return 0;
#endif
}
void Linker::InitTlsForThread(bool is_primary) {
@ -395,8 +414,10 @@ void Linker::InitTlsForThread(bool is_primary) {
tcb->tcb_dtv[module->tls.modid + 1].pointer = dest;
}
#ifdef __x86_64__
// Set pointer to FS base
SetTcbBase(tcb);
#endif
}
void Linker::DebugDump() {

View File

@ -1,7 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#ifdef __x86_64__
#include <xbyak/xbyak.h>
#endif
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@ -93,9 +96,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LoadOffset += CODE_BASE_INCR * (1 + aligned_base_size / CODE_BASE_INCR);
LOG_INFO(Core_Linker, "Loading module {} to {}", name, fmt::ptr(*out_addr));
#ifdef __x86_64__
// Initialize trampoline generator.
void* trampoline_addr = std::bit_cast<void*>(base_virtual_addr + aligned_base_size);
Xbyak::CodeGenerator c(TrampolineSize, trampoline_addr);
#endif
LOG_INFO(Core_Linker, "======== Load Module to Memory ========");
LOG_INFO(Core_Linker, "base_virtual_addr ......: {:#018x}", base_virtual_addr);
@ -105,7 +110,9 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
const auto add_segment = [this](const elf_program_header& phdr, bool do_map = true) {
const VAddr segment_addr = base_virtual_addr + phdr.p_vaddr;
if (do_map) {
pthread_jit_write_protect_np(false);
elf.LoadSegment(segment_addr, phdr.p_offset, phdr.p_filesz);
pthread_jit_write_protect_np(true);
}
auto& segment = info.segments[info.num_segments++];
segment.address = segment_addr;
@ -115,6 +122,9 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
for (u16 i = 0; i < elf_header.e_phnum; i++) {
const auto header_type = elf.ElfPheaderTypeStr(elf_pheader[i].p_type);
printf("%d/%d: %d\n", i, elf_header.e_phnum, elf_pheader[i].p_type);
switch (elf_pheader[i].p_type) {
case PT_LOAD:
case PT_SCE_RELRO: {
@ -134,8 +144,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode);
add_segment(elf_pheader[i]);
if (elf_pheader[i].p_flags & PF_EXEC) {
if (elf_pheader[i].p_flags & PF_EXEC)
{
#ifdef __x86_64__
PatchInstructions(segment_addr, segment_file_size, c);
#endif
}
break;
}

View File

@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#ifdef __x86_64__
#include <mutex>
#include "common/assert.h"
#include "common/types.h"
@ -147,3 +149,5 @@ Tcb* GetTcbBase() {
#endif
} // namespace Core
#endif

View File

@ -5,9 +5,11 @@
#include "common/types.h"
#ifdef __x86_64__
namespace Xbyak {
class CodeGenerator;
}
#endif
namespace Core {

View File

@ -127,6 +127,10 @@ bool memory_patch(u64 vaddr, u64 value) {
MemoryMode old_mode{};
// memory_protect(vaddr, 8, MemoryMode::ReadWrite, &old_mode);
#if defined(__APPLE__) && defined(__aarch64__)
pthread_jit_write_protect_np(false);
#endif
auto* ptr = reinterpret_cast<uint64_t*>(vaddr);
bool ret = (*ptr != value);
@ -140,6 +144,10 @@ bool memory_patch(u64 vaddr, u64 value) {
memory_flush(vaddr, 8);
}
#if defined(__APPLE__) && defined(__aarch64__)
pthread_jit_write_protect_np(true);
#endif
return ret;
}
static u64 AlignUp(u64 pos, u64 align) {

View File

@ -194,11 +194,17 @@ struct PageManager::Impl {
static void GuestFaultSignalHandler(int sig, siginfo_t* info, void* raw_context) {
ucontext_t* ctx = reinterpret_cast<ucontext_t*>(raw_context);
const VAddr address = reinterpret_cast<VAddr>(info->si_addr);
#ifdef __x86_64__
#ifdef __APPLE__
const u32 err = ctx->uc_mcontext->__es.__err;
#else
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
#endif
#else
u32 err = 0;
#endif
if (err & 0x2) {
const VAddr addr_aligned = Common::AlignDown(address, PAGESIZE);
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);