diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ceb915f6a..588236b14 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -76,18 +76,13 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- - name: Setup VS Environment
- uses: ilammy/msvc-dev-cmd@v1.13.0
- with:
- arch: amd64
-
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@@ -111,7 +106,7 @@ jobs:
- name: Setup Qt
uses: jurplel/install-qt-action@v4
with:
- version: 6.9.0
+ version: 6.9.1
host: windows
target: desktop
arch: win64_msvc2022_64
@@ -130,18 +125,13 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- - name: Setup VS Environment
- uses: ilammy/msvc-dev-cmd@v1.13.0
- with:
- arch: amd64
-
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@@ -186,7 +176,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{runner.os}}-sdl-cache-cmake-build
with:
@@ -228,7 +218,7 @@ jobs:
- name: Setup Qt
uses: jurplel/install-qt-action@v4
with:
- version: 6.9.0
+ version: 6.9.1
host: mac
target: desktop
arch: clang_64
@@ -247,7 +237,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{runner.os}}-qt-cache-cmake-build
with:
@@ -301,7 +291,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
@@ -362,7 +352,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build
with:
@@ -409,7 +399,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
with:
@@ -445,7 +435,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build
with:
@@ -494,7 +484,7 @@ jobs:
with:
token: ${{ secrets.SHADPS4_TOKEN_REPO }}
name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
- tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
+ tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}"
draft: false
prerelease: true
body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})"
@@ -530,14 +520,14 @@ jobs:
# Check if release already exists and get ID
release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
- "https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" | jq -r '.id')
+ "https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}" | jq -r '.id')
if [[ "$release_id" == "null" ]]; then
echo "Creating release in $REPO for $filename"
release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
-d '{
- "tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
+ "tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}",
"name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
"draft": false,
"prerelease": true,
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28c8ca4ed..7be24b303 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -966,6 +966,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/texture_cache/tile_manager.cpp
src/video_core/texture_cache/tile_manager.h
src/video_core/texture_cache/types.h
+ src/video_core/texture_cache/host_compatibility.cpp
src/video_core/texture_cache/host_compatibility.h
src/video_core/page_manager.cpp
src/video_core/page_manager.h
diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp
index 8937ef04b..8512858e9 100644
--- a/src/core/cpu_patches.cpp
+++ b/src/core/cpu_patches.cpp
@@ -88,7 +88,8 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
dst_op.reg.value <= ZYDIS_REGISTER_R15;
}
-static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
+static void GenerateTcbAccess(void* /* address */, const ZydisDecodedOperand* operands,
+ Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
#if defined(_WIN32)
@@ -126,7 +127,8 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
return !cpu.has(Cpu::tSSE4a);
}
-static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
+static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operands,
+ Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@@ -245,7 +247,8 @@ static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
}
}
-static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
+static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* operands,
+ Xbyak::CodeGenerator& c) {
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@@ -383,8 +386,44 @@ static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGene
}
}
+static void ReplaceMOVNT(void* address, u8 rep_prefix) {
+ // Find the opcode byte
+ // There can be any amount of prefixes but the instruction can't be more than 15 bytes
+ // And we know for sure this is a MOVNTSS/MOVNTSD
+ bool found = false;
+ bool rep_prefix_found = false;
+ int index = 0;
+ u8* ptr = reinterpret_cast(address);
+ for (int i = 0; i < 15; i++) {
+ if (ptr[i] == rep_prefix) {
+ rep_prefix_found = true;
+ } else if (ptr[i] == 0x2B) {
+ index = i;
+ found = true;
+ break;
+ }
+ }
+
+ // Some sanity checks
+ ASSERT(found);
+ ASSERT(index >= 2);
+ ASSERT(ptr[index - 1] == 0x0F);
+ ASSERT(rep_prefix_found);
+
+ // This turns the MOVNTSS/MOVNTSD to a MOVSS/MOVSD m, xmm
+ ptr[index] = 0x11;
+}
+
+static void ReplaceMOVNTSS(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
+ ReplaceMOVNT(address, 0xF3);
+}
+
+static void ReplaceMOVNTSD(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
+ ReplaceMOVNT(address, 0xF2);
+}
+
using PatchFilter = bool (*)(const ZydisDecodedOperand*);
-using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
+using InstructionGenerator = void (*)(void*, const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo {
/// Filter for more granular patch conditions past just the instruction mnemonic.
PatchFilter filter;
@@ -400,6 +439,8 @@ static const std::unordered_map Patches = {
// SSE4a
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
+ {ZYDIS_MNEMONIC_MOVNTSS, {FilterNoSSE4a, ReplaceMOVNTSS, false}},
+ {ZYDIS_MNEMONIC_MOVNTSD, {FilterNoSSE4a, ReplaceMOVNTSD, false}},
#if defined(_WIN32)
// Windows needs a trampoline.
@@ -477,7 +518,7 @@ static std::pair TryPatch(u8* code, PatchModule* module) {
auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr();
- patch_info.generator(operands, trampoline_gen);
+ patch_info.generator(code, operands, trampoline_gen);
// Return to the following instruction at the end of the trampoline.
trampoline_gen.jmp(code + instruction.length);
@@ -485,7 +526,7 @@ static std::pair TryPatch(u8* code, PatchModule* module) {
// Replace instruction with near jump to the trampoline.
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
} else {
- patch_info.generator(operands, patch_gen);
+ patch_info.generator(code, operands, patch_gen);
}
const auto patch_size = patch_gen.getCurr() - code;
diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp
index 180850217..930640d0e 100644
--- a/src/core/libraries/kernel/kernel.cpp
+++ b/src/core/libraries/kernel/kernel.cpp
@@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
+ LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
+ LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
+ LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
+ LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
}
} // namespace Libraries::Kernel
diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp
index 18676cbdf..f02ddafdc 100644
--- a/src/core/libraries/kernel/memory.cpp
+++ b/src/core/libraries/kernel/memory.cpp
@@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
return ret;
}
-s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
- int flags, const char* name) {
-
+s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
+ const char* name) {
+ LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
+ fmt::ptr(*addr_in_out), len, prot, flags, name);
if (len == 0 || !Common::Is16KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
return ORBIS_KERNEL_ERROR_EINVAL;
@@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
const VAddr in_addr = reinterpret_cast(*addr_in_out);
const auto mem_prot = static_cast(prot);
const auto map_flags = static_cast(flags);
- SCOPE_EXIT {
- LOG_INFO(Kernel_Vmm,
- "in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
- in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
- };
auto* memory = Core::Memory::Instance();
- return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
- Core::VMAType::Flexible, name);
+ const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
+ Core::VMAType::Flexible, name);
+ LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
+ return ret;
}
-s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
- int flags) {
+s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
}
@@ -663,6 +660,9 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
"PRT aperture id = {}, address = {:#x}, size = {:#x} is set but not used", id,
address, size);
+ auto* memory = Core::Memory::Instance();
+ memory->SetPrtArea(id, address, size);
+
PrtApertures[id] = {address, size};
return ORBIS_OK;
}
diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h
index 6cefe0d07..ea42e7546 100644
--- a/src/core/libraries/kernel/memory.h
+++ b/src/core/libraries/kernel/memory.h
@@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
size_t infoSize);
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
-s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot,
- int flags, const char* name);
-s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
- int flags);
+s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
+ const char* name);
+s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
diff --git a/src/core/libraries/kernel/threads/mutex.cpp b/src/core/libraries/kernel/threads/mutex.cpp
index 956e5ef65..3dbade96a 100644
--- a/src/core/libraries/kernel/threads/mutex.cpp
+++ b/src/core/libraries/kernel/threads/mutex.cpp
@@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
// Posix
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
+ LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp
index c5208b6dd..da715b3bf 100644
--- a/src/core/libraries/videoout/video_out.cpp
+++ b/src/core/libraries/videoout/video_out.cpp
@@ -282,7 +282,12 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) {
LOG_INFO(Lib_VideoOut, "called");
- *status = driver->GetPort(handle)->resolution;
+ auto* port = driver->GetPort(handle);
+ if (!port || !port->is_open) {
+ return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
+ }
+
+ *status = port->resolution;
return ORBIS_OK;
}
diff --git a/src/core/libraries/zlib/zlib.cpp b/src/core/libraries/zlib/zlib.cpp
index 899cb5bf6..b304992ad 100644
--- a/src/core/libraries/zlib/zlib.cpp
+++ b/src/core/libraries/zlib/zlib.cpp
@@ -51,7 +51,7 @@ void ZlibTaskThread(const std::stop_token& stop) {
if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) {
break;
}
- task = task_queue.back();
+ task = task_queue.front();
task_queue.pop();
}
@@ -136,7 +136,7 @@ s32 PS4_SYSV_ABI sceZlibWaitForDone(u64* request_id, const u32* timeout) {
} else {
done_queue_cv.wait(lock, pred);
}
- *request_id = done_queue.back();
+ *request_id = done_queue.front();
done_queue.pop();
}
return ORBIS_OK;
diff --git a/src/core/linker.cpp b/src/core/linker.cpp
index c50b03a8f..1f45caf12 100644
--- a/src/core/linker.cpp
+++ b/src/core/linker.cpp
@@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
sr.type = sym_type;
const auto* record = m_hle_symbols.FindSymbol(sr);
- if (!record) {
- // Check if it an export function
- const auto* p = FindExportedModule(*module, *library);
- if (p && p->export_sym.GetSize() > 0) {
- record = p->export_sym.FindSymbol(sr);
- }
- }
if (record) {
*return_info = *record;
-
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
-
return true;
}
+ // Check if it an export function
+ const auto* p = FindExportedModule(*module, *library);
+ if (p && p->export_sym.GetSize() > 0) {
+ record = p->export_sym.FindSymbol(sr);
+ if (record) {
+ *return_info = *record;
+ return true;
+ }
+ }
+
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
if (aeronid) {
return_info->name = aeronid->name;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index ba3640877..e738f85a1 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -95,6 +95,46 @@ u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) {
return clamped_size;
}
+void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
+ PrtArea& area = prt_areas[id];
+ if (area.mapped) {
+ rasterizer->UnmapMemory(area.start, area.end - area.start);
+ }
+
+ area.start = address;
+ area.end = address + size;
+ area.mapped = true;
+
+ // Pretend the entire PRT area is mapped to avoid GPU tracking errors.
+ // The caches will use CopySparseMemory to fetch data which avoids unmapped areas.
+ rasterizer->MapMemory(address, size);
+}
+
+void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
+ const bool is_sparse = std::ranges::any_of(
+ prt_areas, [&](const PrtArea& area) { return area.Overlaps(virtual_addr, size); });
+ if (!is_sparse) {
+ std::memcpy(dest, std::bit_cast(virtual_addr), size);
+ return;
+ }
+
+ auto vma = FindVMA(virtual_addr);
+ ASSERT_MSG(vma->second.Contains(virtual_addr, 0),
+ "Attempted to access invalid GPU address {:#x}", virtual_addr);
+ while (size) {
+ u64 copy_size = std::min(vma->second.size - (virtual_addr - vma->first), size);
+ if (vma->second.IsFree()) {
+ std::memset(dest, 0, copy_size);
+ } else {
+ std::memcpy(dest, std::bit_cast(virtual_addr), copy_size);
+ }
+ size -= copy_size;
+ virtual_addr += copy_size;
+ dest += copy_size;
+ ++vma;
+ }
+}
+
bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
const VAddr virtual_addr = std::bit_cast(address);
const auto& vma = FindVMA(virtual_addr)->second;
@@ -182,7 +222,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
auto& area = CarveDmemArea(mapping_start, size)->second;
area.memory_type = memory_type;
area.is_free = false;
- MergeAdjacent(dmem_map, dmem_area);
return mapping_start;
}
diff --git a/src/core/memory.h b/src/core/memory.h
index b3ebe3c27..68f9c26c4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -172,6 +172,10 @@ public:
u64 ClampRangeSize(VAddr virtual_addr, u64 size);
+ void SetPrtArea(u32 id, VAddr address, u64 size);
+
+ void CopySparseMemory(VAddr source, u8* dest, u64 size);
+
bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);
@@ -275,6 +279,18 @@ private:
size_t pool_budget{};
Vulkan::Rasterizer* rasterizer{};
+ struct PrtArea {
+ VAddr start;
+ VAddr end;
+ bool mapped;
+
+ bool Overlaps(VAddr test_address, u64 test_size) const {
+ const VAddr overlap_end = test_address + test_size;
+ return start < overlap_end && test_address < end;
+ }
+ };
+ std::array prt_areas{};
+
friend class ::Core::Devtools::Widget::MemoryMapViewer;
};
diff --git a/src/core/signals.h b/src/core/signals.h
index 6ee525e10..0409b73ae 100644
--- a/src/core/signals.h
+++ b/src/core/signals.h
@@ -5,6 +5,7 @@
#include
#include "common/singleton.h"
+#include "common/types.h"
namespace Core {
diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp
index 550fdddb5..b0858840a 100644
--- a/src/qt_gui/check_update.cpp
+++ b/src/qt_gui/check_update.cpp
@@ -137,7 +137,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
}
}
- latestRev = latestVersion.right(7);
+ latestRev = latestVersion.right(40);
latestDate = jsonObj["published_at"].toString();
QJsonArray assets = jsonObj["assets"].toArray();
@@ -167,7 +167,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate);
latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date";
- if (latestRev == currentRev.left(7)) {
+ if (latestRev == currentRev) {
if (showMessage) {
QMessageBox::information(this, tr("Auto Updater"),
tr("Your version is already up to date!"));
@@ -215,7 +215,7 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate,
"%3 | "
"(%4) | "
"
")
- .arg(currentRev.left(7), currentDate, latestRev, latestDate);
+ .arg(currentRev.left(7), currentDate, latestRev.left(7), latestDate);
QLabel* updateLabel = new QLabel(updateText, this);
layout->addWidget(updateLabel);
diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts
index 34d31f240..9f254e272 100644
--- a/src/qt_gui/translations/pt_BR.ts
+++ b/src/qt_gui/translations/pt_BR.ts
@@ -2048,7 +2048,7 @@
* Unsupported Vulkan Version
- * Unsupported Vulkan Version
+ * Versão do Vulkan não suportada
diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts
index e61985e90..c6d641470 100644
--- a/src/qt_gui/translations/tr_TR.ts
+++ b/src/qt_gui/translations/tr_TR.ts
@@ -138,7 +138,7 @@
File Exists
- Dosya mevcut
+ Dosya Mevcut
File already exists. Do you want to replace it?
@@ -1221,7 +1221,7 @@
Exit shadPS4
- shadPS4'ten Çık
+ shadPS4 Çıkış
Exit the application.
@@ -1381,7 +1381,7 @@
Game Boot
- Oyun Başlatma
+ Oyun Başlat
Only one file can be selected!
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index f2e6279f4..37d7eea35 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -303,6 +303,11 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
ctx.AddExtension("SPV_KHR_physical_storage_buffer");
}
+ if (info.uses_shared && profile.supports_workgroup_explicit_memory_layout) {
+ ctx.AddExtension("SPV_KHR_workgroup_memory_explicit_layout");
+ ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR);
+ ctx.AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR);
+ }
}
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index d7c73ca8f..13fd8e180 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/div_ceil.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
@@ -15,42 +17,40 @@ std::pair AtomicArgs(EmitContext& ctx) {
Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)};
- const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
- const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
- return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+ return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
+ });
+}
+
+Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const Id shift_id{ctx.ConstU32(3U)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value);
+ });
}
Id SharedAtomicU32_IncDec(EmitContext& ctx, Id offset,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const Id shift_id{ctx.ConstU32(2U)};
- const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
- const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)};
const auto [scope, semantics]{AtomicArgs(ctx)};
- return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
-}
-
-Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
- if (Sirit::ValidId(buffer_size)) {
- // Bounds checking enabled, wrap in a conditional branch to make sure that
- // the atomic is not mistakenly executed when the index is out of bounds.
- const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
- const Id ib_label = ctx.OpLabel();
- const Id oob_label = ctx.OpLabel();
- const Id end_label = ctx.OpLabel();
- ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
- ctx.OpBranchConditional(in_bounds, ib_label, oob_label);
- ctx.AddLabel(ib_label);
- const Id ib_result = emit_func();
- ctx.OpBranch(end_label);
- ctx.AddLabel(oob_label);
- const Id oob_result = ctx.u32_zero_value;
- ctx.OpBranch(end_label);
- ctx.AddLabel(end_label);
- return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label);
- }
- // Bounds checking not enabled, just perform the atomic operation.
- return emit_func();
+ return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics);
+ });
}
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
@@ -63,11 +63,42 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
- return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
+ return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value);
});
}
+Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id cmp_value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
+ const auto& buffer = ctx.buffers[handle];
+ if (Sirit::ValidId(buffer.offset)) {
+ address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
+ }
+ const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
+ const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
+ const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] {
+ return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
+ });
+}
+
+Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
+ const auto& buffer = ctx.buffers[handle];
+ if (Sirit::ValidId(buffer.offset)) {
+ address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
+ }
+ const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u));
+ const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64];
+ const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] {
+ return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
+ });
+}
+
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
@@ -89,6 +120,10 @@ Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
}
+Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value) {
+ return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicIAdd);
+}
+
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
}
@@ -133,6 +168,10 @@ Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
}
+Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+ return BufferAtomicU64(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
+}
+
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicSMin);
}
@@ -175,6 +214,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
}
+Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id cmp_value) {
+ return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
+ &Sirit::Module::OpAtomicCompareExchange);
+}
+
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bounds.h b/src/shader_recompiler/backend/spirv/emit_spirv_bounds.h
new file mode 100644
index 000000000..41e70c8c3
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bounds.h
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
+#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
+
+namespace Shader::Backend::SPIRV {
+
+template
+auto AccessBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) {
+ Id zero_value{};
+ Id result_type{};
+ if constexpr (bit_size == 64) {
+ zero_value = ctx.u64_zero_value;
+ result_type = ctx.U64;
+ } else if constexpr (bit_size == 32) {
+ zero_value = ctx.u32_zero_value;
+ result_type = ctx.U32[1];
+ } else if constexpr (bit_size == 16) {
+ zero_value = ctx.u16_zero_value;
+ result_type = ctx.U16;
+ } else {
+ static_assert(false, "type not supported");
+ }
+ if (Sirit::ValidId(buffer_size)) {
+ // Bounds checking enabled, wrap in a conditional branch to make sure that
+ // the atomic is not mistakenly executed when the index is out of bounds.
+ const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size);
+ const Id ib_label = ctx.OpLabel();
+ const Id end_label = ctx.OpLabel();
+ ctx.OpSelectionMerge(end_label, spv::SelectionControlMask::MaskNone);
+ ctx.OpBranchConditional(in_bounds, ib_label, end_label);
+ const auto last_label = ctx.last_label;
+ ctx.AddLabel(ib_label);
+ const auto ib_result = emit_func();
+ ctx.OpBranch(end_label);
+ ctx.AddLabel(end_label);
+ if (Sirit::ValidId(ib_result)) {
+ return ctx.OpPhi(result_type, ib_result, ib_label, zero_value, last_label);
+ } else {
+ return Id{0};
+ }
+ }
+ // Bounds checking not enabled, just perform the atomic operation.
+ return emit_func();
+}
+
+} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 09f9732bf..3441c5a23 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -86,6 +86,7 @@ void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicIAdd64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMax32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@@ -96,6 +97,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id cmp_value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
@@ -118,11 +121,14 @@ Id EmitUndefU8(EmitContext& ctx);
Id EmitUndefU16(EmitContext& ctx);
Id EmitUndefU32(EmitContext& ctx);
Id EmitUndefU64(EmitContext& ctx);
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset);
Id EmitLoadSharedU32(EmitContext& ctx, Id offset);
Id EmitLoadSharedU64(EmitContext& ctx, Id offset);
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value);
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
+Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
@@ -372,6 +378,7 @@ Id EmitBitCount64(EmitContext& ctx, Id value);
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
Id EmitFindSMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb32(EmitContext& ctx, Id value);
+Id EmitFindUMsb64(EmitContext& ctx, Id value);
Id EmitFindILsb32(EmitContext& ctx, Id value);
Id EmitFindILsb64(EmitContext& ctx, Id value);
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 10bfbb2ab..1a995354d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -229,6 +229,20 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
return ctx.OpFindUMsb(ctx.U32[1], value);
}
+Id EmitFindUMsb64(EmitContext& ctx, Id value) {
+ // Vulkan restricts some bitwise operations to 32-bit only, so decompose into
+ // two 32-bit values and select the correct result.
+ const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
+ const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
+ const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
+ const Id hi_msb{ctx.OpFindUMsb(ctx.U32[1], hi)};
+ const Id lo_msb{ctx.OpFindUMsb(ctx.U32[1], lo)};
+ const Id found_hi{ctx.OpINotEqual(ctx.U1[1], hi_msb, ctx.ConstU32(u32(-1)))};
+ const Id shifted_hi{ctx.OpIAdd(ctx.U32[1], hi_msb, ctx.ConstU32(32u))};
+ // value == 0 case is checked in IREmitter
+ return ctx.OpSelect(ctx.U32[1], found_hi, shifted_hi, lo_msb);
+}
+
Id EmitFindILsb32(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U32[1], value);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
index 8b1610d61..c59406499 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
@@ -1,43 +1,86 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/div_ceil.h"
+#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV {
+Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
+ const Id shift_id{ctx.ConstU32(1U)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
+
+ return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ const Id pointer =
+ ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
+ return ctx.OpLoad(ctx.U16, pointer);
+ });
+}
+
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const Id shift_id{ctx.ConstU32(2U)};
- const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
- const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
- return ctx.OpLoad(ctx.U32[1], pointer);
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
+
+ return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ const Id pointer =
+ ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
+ return ctx.OpLoad(ctx.U32[1], pointer);
+ });
}
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
- const Id shift_id{ctx.ConstU32(2U)};
- const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
- const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.ConstU32(1U))};
- const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)};
- const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)};
- return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer),
- ctx.OpLoad(ctx.U32[1], rhs_pointer));
+ const Id shift_id{ctx.ConstU32(3U)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
+
+ return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
+ return ctx.OpLoad(ctx.U64, pointer);
+ });
+}
+
+void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
+ const Id shift{ctx.ConstU32(1U)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
+
+ AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ const Id pointer =
+ ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
+ ctx.OpStore(pointer, value);
+ return Id{0};
+ });
}
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
const Id shift{ctx.ConstU32(2U)};
- const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
- const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset);
- ctx.OpStore(pointer, value);
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
+
+ AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ const Id pointer =
+ ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
+ ctx.OpStore(pointer, value);
+ return Id{0};
+ });
}
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
- const Id shift{ctx.ConstU32(2U)};
- const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)};
- const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.ConstU32(1U))};
- const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)};
- const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)};
- ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U));
- ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
+ const Id shift{ctx.ConstU32(3U)};
+ const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift)};
+ const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
+
+ AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
+ const Id pointer{
+ ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
+ ctx.OpStore(pointer, value);
+ return Id{0};
+ });
}
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 68bfcc0d0..672856397 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -146,6 +146,7 @@ void EmitContext::DefineArithmeticTypes() {
false_value = ConstantFalse(U1[1]);
u8_one_value = Constant(U8, 1U);
u8_zero_value = Constant(U8, 0U);
+ u16_zero_value = Constant(U16, 0U);
u32_one_value = ConstU32(1U);
u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f);
@@ -285,6 +286,8 @@ void EmitContext::DefineBufferProperties() {
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
+ buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
+ Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
}
}
}
@@ -307,7 +310,9 @@ void EmitContext::DefineInterpolatedAttribs() {
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
const Id p10{OpFSub(F32[4], p1, p0)};
const Id p20{OpFSub(F32[4], p2, p0)};
- const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)};
+ const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
+ ? bary_coord_linear_id
+ : bary_coord_persp_id)};
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
@@ -411,8 +416,14 @@ void EmitContext::DefineInputs() {
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
}
if (profile.needs_manual_interpolation) {
- gl_bary_coord_id =
- DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
+ if (info.has_perspective_interp) {
+ bary_coord_persp_id =
+ DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
+ }
+ if (info.has_linear_interp) {
+ bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
+ spv::StorageClass::Input);
+ }
}
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i];
@@ -435,9 +446,12 @@ void EmitContext::DefineInputs() {
} else {
attr_id = DefineInput(type, semantic);
Name(attr_id, fmt::format("fs_in_attr{}", semantic));
- }
- if (input.is_flat) {
- Decorate(attr_id, spv::Decoration::Flat);
+
+ if (input.is_flat) {
+ Decorate(attr_id, spv::Decoration::Flat);
+ } else if (IsLinear(info.interp_qualifiers[i])) {
+ Decorate(attr_id, spv::Decoration::NoPerspective);
+ }
}
input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
@@ -634,7 +648,8 @@ void EmitContext::DefineOutputs() {
}
break;
}
- case LogicalStage::Fragment:
+ case LogicalStage::Fragment: {
+ u32 num_render_targets = 0;
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
@@ -643,11 +658,21 @@ void EmitContext::DefineOutputs() {
const u32 num_components = info.stores.NumComponents(mrt);
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
const Id type{GetAttributeType(*this, num_format)[num_components]};
- const Id id{DefineOutput(type, i)};
+ Id id;
+ if (runtime_info.fs_info.dual_source_blending) {
+ id = DefineOutput(type, 0);
+ Decorate(id, spv::Decoration::Index, i);
+ } else {
+ id = DefineOutput(type, i);
+ }
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
+ ++num_render_targets;
}
+ ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
+ "Dual source blending enabled, there must be exactly two MRT exports");
break;
+ }
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
@@ -957,13 +982,27 @@ void EmitContext::DefineSharedMemory() {
}
ASSERT(info.stage == Stage::Compute);
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
- const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)};
- const Id type{TypeArray(U32[1], ConstU32(num_elements))};
- shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
- shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
- shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
- Name(shared_memory_u32, "shared_mem");
- interfaces.push_back(shared_memory_u32);
+
+ const auto make_type = [&](Id element_type, u32 element_size) {
+ const u32 num_elements{Common::DivCeil(shared_memory_size, element_size)};
+ const Id array_type{TypeArray(element_type, ConstU32(num_elements))};
+ Decorate(array_type, spv::Decoration::ArrayStride, element_size);
+
+ const Id struct_type{TypeStruct(array_type)};
+ MemberDecorate(struct_type, 0u, spv::Decoration::Offset, 0u);
+ Decorate(struct_type, spv::Decoration::Block);
+
+ const Id pointer = TypePointer(spv::StorageClass::Workgroup, struct_type);
+ const Id element_pointer = TypePointer(spv::StorageClass::Workgroup, element_type);
+ const Id variable = AddGlobalVariable(pointer, spv::StorageClass::Workgroup);
+ Decorate(variable, spv::Decoration::Aliased);
+ interfaces.push_back(variable);
+
+ return std::make_tuple(variable, element_pointer, pointer);
+ };
+ std::tie(shared_memory_u16, shared_u16, shared_memory_u16_type) = make_type(U16, 2u);
+ std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make_type(U32[1], 4u);
+ std::tie(shared_memory_u64, shared_u64, shared_memory_u64_type) = make_type(U64, 8u);
}
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index a2e0d2f47..93c4ed265 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -235,17 +235,16 @@ public:
Id false_value{};
Id u8_one_value{};
Id u8_zero_value{};
+ Id u16_zero_value{};
Id u32_one_value{};
Id u32_zero_value{};
Id f32_zero_value{};
Id u64_one_value{};
Id u64_zero_value{};
- Id shared_u8{};
Id shared_u16{};
Id shared_u32{};
- Id shared_u32x2{};
- Id shared_u32x4{};
+ Id shared_u64{};
Id input_u32{};
Id input_f32{};
@@ -285,16 +284,16 @@ public:
Id image_u32{};
Id image_f32{};
- Id shared_memory_u8{};
Id shared_memory_u16{};
Id shared_memory_u32{};
- Id shared_memory_u32x2{};
- Id shared_memory_u32x4{};
+ Id shared_memory_u64{};
+ Id shared_memory_u16_type{};
Id shared_memory_u32_type{};
+ Id shared_memory_u64_type{};
- Id interpolate_func{};
- Id gl_bary_coord_id{};
+ Id bary_coord_persp_id{};
+ Id bary_coord_linear_id{};
struct TextureDefinition {
const VectorIds* data_types;
@@ -320,6 +319,7 @@ public:
Id size;
Id size_shorts;
Id size_dwords;
+ Id size_qwords;
std::array aliases;
const BufferSpv& operator[](PointerType alias) const {
diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp
index 8750e2b18..4b5869e1d 100644
--- a/src/shader_recompiler/frontend/copy_shader.cpp
+++ b/src/shader_recompiler/frontend/copy_shader.cpp
@@ -67,6 +67,9 @@ CopyShaderData ParseCopyShader(std::span code) {
if (last_attr != IR::Attribute::Position0) {
data.num_attrs = static_cast(last_attr) - static_cast(IR::Attribute::Param0) + 1;
+ const auto it = data.attr_map.begin();
+ const u32 comp_stride = std::next(it)->first - it->first;
+ data.output_vertices = comp_stride / 64;
}
return data;
diff --git a/src/shader_recompiler/frontend/copy_shader.h b/src/shader_recompiler/frontend/copy_shader.h
index 55cc31ebd..24c7060ed 100644
--- a/src/shader_recompiler/frontend/copy_shader.h
+++ b/src/shader_recompiler/frontend/copy_shader.h
@@ -3,8 +3,8 @@
#pragma once
+#include