diff --git a/CMakeLists.txt b/CMakeLists.txt
index e993061bd..53a2281ec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -203,7 +203,7 @@ execute_process(
# Set Version
set(EMULATOR_VERSION_MAJOR "0")
-set(EMULATOR_VERSION_MINOR "8")
+set(EMULATOR_VERSION_MINOR "9")
set(EMULATOR_VERSION_PATCH "1")
set_source_files_properties(src/shadps4.rc PROPERTIES COMPILE_DEFINITIONS "EMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR};EMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR};EMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}")
@@ -674,6 +674,8 @@ set(COMMON src/common/logging/backend.cpp
src/common/polyfill_thread.h
src/common/rdtsc.cpp
src/common/rdtsc.h
+ src/common/recursive_lock.cpp
+ src/common/recursive_lock.h
src/common/sha1.h
src/common/signal_context.h
src/common/signal_context.cpp
@@ -864,6 +866,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/shared_memory_barrier_pass.cpp
src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
+ src/shader_recompiler/ir/abstract_syntax_list.cpp
src/shader_recompiler/ir/abstract_syntax_list.h
src/shader_recompiler/ir/attribute.cpp
src/shader_recompiler/ir/attribute.h
diff --git a/dist/net.shadps4.shadPS4.metainfo.xml b/dist/net.shadps4.shadPS4.metainfo.xml
index 9f7b4f9c5..493dc0df6 100644
--- a/dist/net.shadps4.shadPS4.metainfo.xml
+++ b/dist/net.shadps4.shadPS4.metainfo.xml
@@ -37,7 +37,10 @@
Game
-
+
+ https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.9.0
+
+
https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.8.0
diff --git a/externals/sirit b/externals/sirit
index 09a1416ab..6b450704f 160000
--- a/externals/sirit
+++ b/externals/sirit
@@ -1 +1 @@
-Subproject commit 09a1416ab1b59ddfebd2618412f118f2004f3b2c
+Subproject commit 6b450704f6fedb9413d0c89a9eb59d028eb1e6c0
diff --git a/src/common/config.cpp b/src/common/config.cpp
index a1515d0e9..6565ab82a 100644
--- a/src/common/config.cpp
+++ b/src/common/config.cpp
@@ -155,7 +155,7 @@ bool GetLoadGameSizeEnabled() {
std::filesystem::path GetSaveDataPath() {
if (save_data_path.empty()) {
- return Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir);
+ return Common::FS::GetUserPath(Common::FS::PathType::UserDir) / "savedata";
}
return save_data_path;
}
diff --git a/src/common/path_util.cpp b/src/common/path_util.cpp
index 1a6ff9ec8..3270c24dd 100644
--- a/src/common/path_util.cpp
+++ b/src/common/path_util.cpp
@@ -128,7 +128,6 @@ static auto UserPaths = [] {
create_path(PathType::LogDir, user_dir / LOG_DIR);
create_path(PathType::ScreenshotsDir, user_dir / SCREENSHOTS_DIR);
create_path(PathType::ShaderDir, user_dir / SHADER_DIR);
- create_path(PathType::SaveDataDir, user_dir / SAVEDATA_DIR);
create_path(PathType::GameDataDir, user_dir / GAMEDATA_DIR);
create_path(PathType::TempDataDir, user_dir / TEMPDATA_DIR);
create_path(PathType::SysModuleDir, user_dir / SYSMODULES_DIR);
diff --git a/src/common/path_util.h b/src/common/path_util.h
index 2fd9b1588..b8053a229 100644
--- a/src/common/path_util.h
+++ b/src/common/path_util.h
@@ -18,7 +18,6 @@ enum class PathType {
LogDir, // Where log files are stored.
ScreenshotsDir, // Where screenshots are stored.
ShaderDir, // Where shaders are stored.
- SaveDataDir, // Where guest save data is stored.
TempDataDir, // Where game temp data is stored.
GameDataDir, // Where game data is stored.
SysModuleDir, // Where system modules are stored.
@@ -36,7 +35,6 @@ constexpr auto PORTABLE_DIR = "user";
constexpr auto LOG_DIR = "log";
constexpr auto SCREENSHOTS_DIR = "screenshots";
constexpr auto SHADER_DIR = "shader";
-constexpr auto SAVEDATA_DIR = "savedata";
constexpr auto GAMEDATA_DIR = "data";
constexpr auto TEMPDATA_DIR = "temp";
constexpr auto SYSMODULES_DIR = "sys_modules";
diff --git a/src/common/recursive_lock.cpp b/src/common/recursive_lock.cpp
new file mode 100644
index 000000000..2471a2ee0
--- /dev/null
+++ b/src/common/recursive_lock.cpp
@@ -0,0 +1,37 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include
+#include "common/assert.h"
+#include "common/recursive_lock.h"
+
+namespace Common::Detail {
+
+struct RecursiveLockState {
+ RecursiveLockType type;
+ int count;
+};
+
+thread_local std::unordered_map g_recursive_locks;
+
+bool IncrementRecursiveLock(void* mutex, RecursiveLockType type) {
+ auto& state = g_recursive_locks[mutex];
+ if (state.count == 0) {
+ ASSERT(state.type == RecursiveLockType::None);
+ state.type = type;
+ }
+ ASSERT(state.type == type);
+ return state.count++ == 0;
+}
+
+bool DecrementRecursiveLock(void* mutex, RecursiveLockType type) {
+ auto& state = g_recursive_locks[mutex];
+ ASSERT(state.type == type && state.count > 0);
+ if (--state.count == 0) {
+ g_recursive_locks.erase(mutex);
+ return true;
+ }
+ return false;
+}
+
+} // namespace Common::Detail
diff --git a/src/common/recursive_lock.h b/src/common/recursive_lock.h
new file mode 100644
index 000000000..5a5fc6658
--- /dev/null
+++ b/src/common/recursive_lock.h
@@ -0,0 +1,67 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include
+#include
+#include
+
+namespace Common {
+
+namespace Detail {
+
+enum class RecursiveLockType { None, Shared, Exclusive };
+
+bool IncrementRecursiveLock(void* mutex, RecursiveLockType type);
+bool DecrementRecursiveLock(void* mutex, RecursiveLockType type);
+
+} // namespace Detail
+
+template
+class RecursiveScopedLock {
+public:
+ explicit RecursiveScopedLock(MutexType& mutex) : m_mutex(mutex), m_locked(false) {
+ if (Detail::IncrementRecursiveLock(&m_mutex, Detail::RecursiveLockType::Exclusive)) {
+ m_locked = true;
+ m_lock.emplace(m_mutex);
+ }
+ }
+
+ ~RecursiveScopedLock() {
+ Detail::DecrementRecursiveLock(&m_mutex, Detail::RecursiveLockType::Exclusive);
+ if (m_locked) {
+ m_lock.reset();
+ }
+ }
+
+private:
+ MutexType& m_mutex;
+ std::optional> m_lock;
+ bool m_locked = false;
+};
+
+template
+class RecursiveSharedLock {
+public:
+ explicit RecursiveSharedLock(MutexType& mutex) : m_mutex(mutex), m_locked(false) {
+ if (Detail::IncrementRecursiveLock(&m_mutex, Detail::RecursiveLockType::Shared)) {
+ m_locked = true;
+ m_lock.emplace(m_mutex);
+ }
+ }
+
+ ~RecursiveSharedLock() {
+ Detail::DecrementRecursiveLock(&m_mutex, Detail::RecursiveLockType::Shared);
+ if (m_locked) {
+ m_lock.reset();
+ }
+ }
+
+private:
+ MutexType& m_mutex;
+ std::optional> m_lock;
+ bool m_locked = false;
+};
+
+} // namespace Common
\ No newline at end of file
diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h
index d4ac51361..2f693fb28 100644
--- a/src/common/slot_vector.h
+++ b/src/common/slot_vector.h
@@ -14,6 +14,9 @@ namespace Common {
struct SlotId {
static constexpr u32 INVALID_INDEX = std::numeric_limits::max();
+ SlotId() noexcept = default;
+ constexpr SlotId(u32 index) noexcept : index(index) {}
+
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
@@ -28,6 +31,63 @@ class SlotVector {
constexpr static std::size_t InitialCapacity = 2048;
public:
+ template
+ class Iterator {
+ public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = ValueType;
+ using difference_type = std::ptrdiff_t;
+ using pointer = Pointer;
+ using reference = Reference;
+
+ Iterator(SlotVector& vector_, SlotId index_) : vector(vector_), slot(index_) {
+ AdvanceToValid();
+ }
+
+ reference operator*() const {
+ return vector[slot];
+ }
+
+ pointer operator->() const {
+ return &vector[slot];
+ }
+
+ Iterator& operator++() {
+ ++slot.index;
+ AdvanceToValid();
+ return *this;
+ }
+
+ Iterator operator++(int) {
+ Iterator temp = *this;
+ ++(*this);
+ return temp;
+ }
+
+ bool operator==(const Iterator& other) const {
+ return slot == other.slot;
+ }
+
+ bool operator!=(const Iterator& other) const {
+ return !(*this == other);
+ }
+
+ private:
+ void AdvanceToValid() {
+ while (slot < vector.values_capacity && !vector.ReadStorageBit(slot.index)) {
+ ++slot.index;
+ }
+ }
+
+ SlotVector& vector;
+ SlotId slot;
+ };
+
+ using iterator = Iterator;
+ using const_iterator = Iterator;
+ using reverse_iterator = std::reverse_iterator;
+ using const_reverse_iterator = std::reverse_iterator;
+
SlotVector() {
Reserve(InitialCapacity);
}
@@ -60,7 +120,7 @@ public:
}
template
- [[nodiscard]] SlotId insert(Args&&... args) noexcept {
+ SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward(args)...);
SetStorageBit(index);
@@ -78,6 +138,54 @@ public:
return values_capacity - free_list.size();
}
+ iterator begin() noexcept {
+ return iterator(*this, 0);
+ }
+
+ const_iterator begin() const noexcept {
+ return const_iterator(*this, 0);
+ }
+
+ const_iterator cbegin() const noexcept {
+ return begin();
+ }
+
+ iterator end() noexcept {
+ return iterator(*this, values_capacity);
+ }
+
+ const_iterator end() const noexcept {
+ return const_iterator(*this, values_capacity);
+ }
+
+ const_iterator cend() const noexcept {
+ return end();
+ }
+
+ reverse_iterator rbegin() noexcept {
+ return reverse_iterator(end());
+ }
+
+ const_reverse_iterator rbegin() const noexcept {
+ return const_reverse_iterator(end());
+ }
+
+ const_reverse_iterator crbegin() const noexcept {
+ return rbegin();
+ }
+
+ reverse_iterator rend() noexcept {
+ return reverse_iterator(begin());
+ }
+
+ const_reverse_iterator rend() const noexcept {
+ return const_reverse_iterator(begin());
+ }
+
+ const_reverse_iterator crend() const noexcept {
+ return rend();
+ }
+
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
diff --git a/src/core/libraries/np_trophy/np_trophy.cpp b/src/core/libraries/np_trophy/np_trophy.cpp
index a951d5655..6de84bd93 100644
--- a/src/core/libraries/np_trophy/np_trophy.cpp
+++ b/src/core/libraries/np_trophy/np_trophy.cpp
@@ -206,6 +206,10 @@ s32 PS4_SYSV_ABI sceNpTrophyDestroyHandle(OrbisNpTrophyHandle handle) {
if (handle == ORBIS_NP_TROPHY_INVALID_HANDLE)
return ORBIS_NP_TROPHY_ERROR_INVALID_HANDLE;
+ if (handle >= trophy_handles.size()) {
+ LOG_ERROR(Lib_NpTrophy, "Invalid handle {}", handle);
+ return ORBIS_NP_TROPHY_ERROR_INVALID_HANDLE;
+ }
if (!trophy_handles.is_allocated({static_cast(handle)})) {
return ORBIS_NP_TROPHY_ERROR_INVALID_HANDLE;
}
diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp
index 0731392cd..b25ebde6c 100644
--- a/src/core/libraries/save_data/savedata.cpp
+++ b/src/core/libraries/save_data/savedata.cpp
@@ -8,6 +8,7 @@
#include
#include "common/assert.h"
+#include "common/config.h"
#include "common/cstring.h"
#include "common/elf_info.h"
#include "common/enum.h"
@@ -438,7 +439,7 @@ static Error saveDataMount(const OrbisSaveDataMount2* mount_info,
LOG_INFO(Lib_SaveData, "called with invalid block size");
}
- const auto root_save = Common::FS::GetUserPath(Common::FS::PathType::SaveDataDir);
+ const auto root_save = Config::GetSaveDataPath();
fs::create_directories(root_save);
const auto available = fs::space(root_save).available;
diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h
index f435a3e38..46a40c5cd 100644
--- a/src/qt_gui/gui_context_menus.h
+++ b/src/qt_gui/gui_context_menus.h
@@ -156,11 +156,9 @@ public:
}
if (selected == openSaveDataFolder) {
- QString userPath;
- Common::FS::PathToQString(userPath,
- Common::FS::GetUserPath(Common::FS::PathType::UserDir));
- QString saveDataPath =
- userPath + "/savedata/1/" + QString::fromStdString(m_games[itemID].save_dir);
+ QString saveDataPath;
+ Common::FS::PathToQString(saveDataPath,
+ Config::GetSaveDataPath() / "1" / m_games[itemID].save_dir);
QDir(saveDataPath).mkpath(saveDataPath);
QDesktopServices::openUrl(QUrl::fromLocalFile(saveDataPath));
}
@@ -485,8 +483,7 @@ public:
dlc_path, Config::getAddonInstallDir() /
Common::FS::PathFromQString(folder_path).parent_path().filename());
Common::FS::PathToQString(save_data_path,
- Common::FS::GetUserPath(Common::FS::PathType::UserDir) /
- "savedata/1" / m_games[itemID].serial);
+ Config::GetSaveDataPath() / "1" / m_games[itemID].save_dir);
Common::FS::PathToQString(trophy_data_path,
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) /
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 9ebb842cc..f2e6279f4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -154,6 +154,7 @@ void Traverse(EmitContext& ctx, const IR::Program& program) {
for (IR::Inst& inst : node.data.block->Instructions()) {
EmitInst(ctx, &inst);
}
+ ctx.first_to_last_label_map[label.value] = ctx.last_label;
break;
}
case IR::AbstractSyntaxNode::Type::If: {
@@ -298,6 +299,10 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
+ if (info.dma_types != IR::Type::Void) {
+ ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
+ ctx.AddExtension("SPV_KHR_physical_storage_buffer");
+ }
}
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
@@ -387,7 +392,7 @@ void SetupFloatMode(EmitContext& ctx, const Profile& profile, const RuntimeInfo&
void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
auto inst{program.blocks.front()->begin()};
size_t block_index{0};
- ctx.PatchDeferredPhi([&](size_t phi_arg) {
+ ctx.PatchDeferredPhi([&](u32 phi_arg, Id first_parent) {
if (phi_arg == 0) {
++inst;
if (inst == program.blocks[block_index]->end() ||
@@ -398,7 +403,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
} while (inst->GetOpcode() != IR::Opcode::Phi);
}
}
- return ctx.Def(inst->Arg(phi_arg));
+ const Id arg = ctx.Def(inst->Arg(phi_arg));
+ const Id parent = ctx.first_to_last_label_map[first_parent.value];
+ return std::make_pair(arg, parent);
});
}
} // Anonymous namespace
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index c3799fb4b..d7c73ca8f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -60,7 +60,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
- const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
+ const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
@@ -257,7 +257,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
- const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
+ const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
@@ -265,7 +265,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding];
- const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32];
+ const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 6442ae9f8..658d4759f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -161,33 +161,37 @@ void EmitGetGotoVariable(EmitContext&) {
UNREACHABLE_MSG("Unreachable instruction");
}
-using BufferAlias = EmitContext::BufferAlias;
+using PointerType = EmitContext::PointerType;
-Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
+Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags();
- const auto& srt_flatbuf = ctx.buffers.back();
- ASSERT(srt_flatbuf.binding >= 0 && flatbuf_off_dw > 0 &&
- srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
- LOG_DEBUG(Render_Recompiler, "ReadConst from flatbuf dword {}", flatbuf_off_dw);
- const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
- const Id ptr{
- ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(flatbuf_off_dw))};
- return ctx.OpLoad(ctx.U32[1], ptr);
+ // We can only provide a fallback for immediate offsets.
+ if (flatbuf_off_dw == 0) {
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const_dynamic, addr, offset);
+ } else {
+ return ctx.OpFunctionCall(ctx.U32[1], ctx.read_const, addr, offset,
+ ctx.ConstU32(flatbuf_off_dw));
+ }
}
-Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
+template
+Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle];
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords);
- const auto [id, pointer_type] = buffer[BufferAlias::U32];
+ const auto [id, pointer_type] = buffer[type];
+ const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
- const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
+ const Id result{ctx.OpLoad(value_type, ptr)};
if (Sirit::ValidId(buffer.size_dwords)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
- return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
- } else {
- return result;
+ return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value);
}
+ return result;
+}
+
+Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
+ return ReadConstBuffer(ctx, handle, index);
}
Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
@@ -246,7 +250,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate),
ctx.ConstU32(param.num_components)),
ctx.ConstU32(comp));
- return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
+ return ReadConstBuffer(ctx, param.buffer_handle, offset);
}
Id result;
@@ -432,7 +436,7 @@ static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size,
return result;
}
-template
+template
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto flags = inst->Flags();
const auto& spv_buffer = ctx.buffers[handle];
@@ -440,7 +444,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
- const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
+ const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
boost::container::static_vector ids;
@@ -451,7 +455,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
if (!flags.typed) {
// Untyped loads have bounds checking per-component.
ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords,
- result_i, alias == BufferAlias::F32));
+ result_i, alias == PointerType::F32));
} else {
ids.push_back(result_i);
}
@@ -461,7 +465,7 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return EmitLoadBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, result,
- alias == BufferAlias::F32);
+ alias == PointerType::F32);
}
return result;
}
@@ -471,7 +475,7 @@ Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
- const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
+ const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))};
return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false);
@@ -482,7 +486,7 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
- const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
+ const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))};
@@ -490,35 +494,35 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address);
+ return EmitLoadBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address);
}
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@@ -548,7 +552,7 @@ void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto
emit_func();
}
-template
+template
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
const auto flags = inst->Flags();
@@ -557,7 +561,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
- const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32;
+ const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias];
auto store = [&] {
@@ -588,7 +592,7 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
- const auto [id, pointer_type] = spv_buffer[BufferAlias::U8];
+ const auto [id, pointer_type] = spv_buffer[PointerType::U8];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpUConvert(ctx.U8, value)};
EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); });
@@ -599,7 +603,7 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
if (Sirit::ValidId(spv_buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset);
}
- const auto [id, pointer_type] = spv_buffer[BufferAlias::U16];
+ const auto [id, pointer_type] = spv_buffer[PointerType::U16];
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u));
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpUConvert(ctx.U16, value)};
@@ -608,35 +612,35 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<1, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<2, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<3, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<4, PointerType::U32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<1, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<2, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<3, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value);
+ EmitStoreBufferB32xN<4, PointerType::F32>(ctx, inst, handle, address, value);
}
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 269f372d5..09f9732bf 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -61,7 +61,7 @@ void EmitSetVectorRegister(EmitContext& ctx);
void EmitSetGotoVariable(EmitContext& ctx);
void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
-Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
+Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 2640030df..68bfcc0d0 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -7,6 +7,7 @@
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/types.h"
+#include "video_core/buffer_cache/buffer_cache.h"
#include
#include
@@ -70,6 +71,12 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
+ if (info.dma_types != IR::Type::Void) {
+ SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
+ } else {
+ SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
+ }
+
AddCapability(spv::Capability::Shader);
DefineArithmeticTypes();
DefineInterfaces();
@@ -137,9 +144,13 @@ void EmitContext::DefineArithmeticTypes() {
true_value = ConstantTrue(U1[1]);
false_value = ConstantFalse(U1[1]);
+ u8_one_value = Constant(U8, 1U);
+ u8_zero_value = Constant(U8, 0U);
u32_one_value = ConstU32(1U);
u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f);
+ u64_one_value = Constant(U64, 1ULL);
+ u64_zero_value = Constant(U64, 0ULL);
pi_x2 = ConstF32(2.0f * float{std::numbers::pi});
@@ -157,6 +168,35 @@ void EmitContext::DefineArithmeticTypes() {
if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
}
+
+ if (True(info.dma_types & IR::Type::F64)) {
+ physical_pointer_types[PointerType::F64] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
+ }
+ if (True(info.dma_types & IR::Type::U64)) {
+ physical_pointer_types[PointerType::U64] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
+ }
+ if (True(info.dma_types & IR::Type::F32)) {
+ physical_pointer_types[PointerType::F32] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
+ }
+ if (True(info.dma_types & IR::Type::U32)) {
+ physical_pointer_types[PointerType::U32] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
+ }
+ if (True(info.dma_types & IR::Type::F16)) {
+ physical_pointer_types[PointerType::F16] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
+ }
+ if (True(info.dma_types & IR::Type::U16)) {
+ physical_pointer_types[PointerType::U16] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
+ }
+ if (True(info.dma_types & IR::Type::U8)) {
+ physical_pointer_types[PointerType::U8] =
+ TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
+ }
}
void EmitContext::DefineInterfaces() {
@@ -195,9 +235,10 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
}
Id EmitContext::GetBufferSize(const u32 sharp_idx) {
- const auto& srt_flatbuf = buffers.back();
- ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo);
- const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32];
+ // Can this be done with memory access? Like we do now with ReadConst
+ const auto& srt_flatbuf = buffers[flatbuf_index];
+ ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf);
+ const auto [id, pointer_type] = srt_flatbuf[PointerType::U32];
const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
@@ -690,8 +731,14 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
case Shader::BufferType::GdsBuffer:
Name(id, "gds_buffer");
break;
- case Shader::BufferType::ReadConstUbo:
- Name(id, "srt_flatbuf_ubo");
+ case Shader::BufferType::Flatbuf:
+ Name(id, "srt_flatbuf");
+ break;
+ case Shader::BufferType::BdaPagetable:
+ Name(id, "bda_pagetable");
+ break;
+ case Shader::BufferType::FaultBuffer:
+ Name(id, "fault_buffer");
break;
case Shader::BufferType::SharedMemory:
Name(id, "ssbo_shmem");
@@ -705,35 +752,53 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
};
void EmitContext::DefineBuffers() {
- if (!profile.supports_robust_buffer_access && !info.has_readconst) {
- // In case ReadConstUbo has not already been bound by IR and is needed
+ if (!profile.supports_robust_buffer_access &&
+ info.readconst_types == Info::ReadConstType::None) {
+ // In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
info.buffers.push_back({
.used_types = IR::Type::U32,
- .inline_cbuf = AmdGpu::Buffer::Null(),
- .buffer_type = BufferType::ReadConstUbo,
+ // We can't guarantee that flatbuf will not grow past UBO
+ // limit if there are a lot of ReadConsts. (We could specialize)
+ .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits::max()),
+ .buffer_type = BufferType::Flatbuf,
});
+ // In the future we may want to read buffer sizes from GPU memory if available.
+ // info.readconst_types |= Info::ReadConstType::Immediate;
}
for (const auto& desc : info.buffers) {
const auto buf_sharp = desc.GetSharp(info);
const bool is_storage = desc.IsStorage(buf_sharp, profile);
+ // Set indexes for special buffers.
+ if (desc.buffer_type == BufferType::Flatbuf) {
+ flatbuf_index = buffers.size();
+ } else if (desc.buffer_type == BufferType::BdaPagetable) {
+ bda_pagetable_index = buffers.size();
+ } else if (desc.buffer_type == BufferType::FaultBuffer) {
+ fault_buffer_index = buffers.size();
+ }
+
// Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
+ if (True(desc.used_types & IR::Type::U64)) {
+ spv_buffer[PointerType::U64] =
+ DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64);
+ }
if (True(desc.used_types & IR::Type::U32)) {
- spv_buffer[BufferAlias::U32] =
+ spv_buffer[PointerType::U32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
}
if (True(desc.used_types & IR::Type::F32)) {
- spv_buffer[BufferAlias::F32] =
+ spv_buffer[PointerType::F32] =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
}
if (True(desc.used_types & IR::Type::U16)) {
- spv_buffer[BufferAlias::U16] =
+ spv_buffer[PointerType::U16] =
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
}
if (True(desc.used_types & IR::Type::U8)) {
- spv_buffer[BufferAlias::U8] =
+ spv_buffer[PointerType::U8] =
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
}
++binding.unified;
@@ -1003,6 +1068,101 @@ Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_vie
return func;
}
+Id EmitContext::DefineGetBdaPointer() {
+ const auto caching_pagebits{
+ Constant(U64, static_cast(VideoCore::BufferCache::CACHING_PAGEBITS))};
+ const auto caching_pagemask{Constant(U64, VideoCore::BufferCache::CACHING_PAGESIZE - 1)};
+
+ const auto func_type{TypeFunction(U64, U64)};
+ const auto func{OpFunction(U64, spv::FunctionControlMask::MaskNone, func_type)};
+ const auto address{OpFunctionParameter(U64)};
+ Name(func, "get_bda_pointer");
+ AddLabel();
+
+ const auto fault_label{OpLabel()};
+ const auto available_label{OpLabel()};
+ const auto merge_label{OpLabel()};
+
+ // Get page BDA
+ const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
+ const auto page32{OpUConvert(U32[1], page)};
+ const auto& bda_buffer{buffers[bda_pagetable_index]};
+ const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64];
+ const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
+ const auto bda{OpLoad(U64, bda_ptr)};
+
+ // Check if page is GPU cached
+ const auto is_fault{OpIEqual(U1[1], bda, u64_zero_value)};
+ OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
+ OpBranchConditional(is_fault, fault_label, available_label);
+
+ // First time acces, mark as fault
+ AddLabel(fault_label);
+ const auto& fault_buffer{buffers[fault_buffer_index]};
+ const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8];
+ const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))};
+ const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))};
+ const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)};
+ const auto fault_ptr{
+ OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)};
+ const auto fault_value{OpLoad(U8, fault_ptr)};
+ const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)};
+ OpStore(fault_ptr, fault_value_masked);
+
+ // Return null pointer
+ const auto fallback_result{u64_zero_value};
+ OpBranch(merge_label);
+
+ // Value is available, compute address
+ AddLabel(available_label);
+ const auto offset_in_bda{OpBitwiseAnd(U64, address, caching_pagemask)};
+ const auto addr{OpIAdd(U64, bda, offset_in_bda)};
+ OpBranch(merge_label);
+
+ // Merge
+ AddLabel(merge_label);
+ const auto result{OpPhi(U64, addr, available_label, fallback_result, fault_label)};
+ OpReturnValue(result);
+ OpFunctionEnd();
+ return func;
+}
+
+Id EmitContext::DefineReadConst(bool dynamic) {
+ const auto func_type{!dynamic ? TypeFunction(U32[1], U32[2], U32[1], U32[1])
+ : TypeFunction(U32[1], U32[2], U32[1])};
+ const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
+ const auto base{OpFunctionParameter(U32[2])};
+ const auto offset{OpFunctionParameter(U32[1])};
+ const auto flatbuf_offset{!dynamic ? OpFunctionParameter(U32[1]) : Id{}};
+ Name(func, dynamic ? "read_const_dynamic" : "read_const");
+ AddLabel();
+
+ const auto base_lo{OpUConvert(U64, OpCompositeExtract(U32[1], base, 0))};
+ const auto base_hi{OpUConvert(U64, OpCompositeExtract(U32[1], base, 1))};
+ const auto base_shift{OpShiftLeftLogical(U64, base_hi, ConstU32(32U))};
+ const auto base_addr{OpBitwiseOr(U64, base_lo, base_shift)};
+ const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
+ const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
+
+ const auto result = EmitMemoryRead(U32[1], addr, [&]() {
+ if (dynamic) {
+ return u32_zero_value;
+ } else {
+ const auto& flatbuf_buffer{buffers[flatbuf_index]};
+ ASSERT(flatbuf_buffer.binding >= 0 &&
+ flatbuf_buffer.buffer_type == BufferType::Flatbuf);
+ const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32];
+ const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
+ flatbuf_offset)};
+ return OpLoad(U32[1], ptr);
+ }
+ });
+
+ OpReturnValue(result);
+ OpFunctionEnd();
+ return func;
+}
+
void EmitContext::DefineFunctions() {
if (info.uses_pack_10_11_11) {
f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
@@ -1012,6 +1172,18 @@ void EmitContext::DefineFunctions() {
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
}
+ if (info.dma_types != IR::Type::Void) {
+ get_bda_pointer = DefineGetBdaPointer();
+ }
+
+ if (True(info.readconst_types & Info::ReadConstType::Immediate)) {
+ LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses immediate ReadConst", info.pgm_hash);
+ read_const = DefineReadConst(false);
+ }
+ if (True(info.readconst_types & Info::ReadConstType::Dynamic)) {
+ LOG_DEBUG(Render_Recompiler, "Shader {:#x} uses dynamic ReadConst", info.pgm_hash);
+ read_const_dynamic = DefineReadConst(true);
+ }
}
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 38d55e0e4..a2e0d2f47 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -4,6 +4,7 @@
#pragma once
#include
+#include
#include
#include "shader_recompiler/backend/bindings.h"
@@ -41,6 +42,17 @@ public:
Bindings& binding);
~EmitContext();
+ enum class PointerType : u32 {
+ U8,
+ U16,
+ F16,
+ U32,
+ F32,
+ U64,
+ F64,
+ NumAlias,
+ };
+
Id Def(const IR::Value& value);
void DefineBufferProperties();
@@ -133,12 +145,72 @@ public:
return ConstantComposite(type, constituents);
}
+ inline Id AddLabel() {
+ last_label = Module::AddLabel();
+ return last_label;
+ }
+
+ inline Id AddLabel(Id label) {
+ last_label = Module::AddLabel(label);
+ return last_label;
+ }
+
+ PointerType PointerTypeFromType(Id type) {
+ if (type.value == U8.value)
+ return PointerType::U8;
+ if (type.value == U16.value)
+ return PointerType::U16;
+ if (type.value == F16[1].value)
+ return PointerType::F16;
+ if (type.value == U32[1].value)
+ return PointerType::U32;
+ if (type.value == F32[1].value)
+ return PointerType::F32;
+ if (type.value == U64.value)
+ return PointerType::U64;
+ if (type.value == F64[1].value)
+ return PointerType::F64;
+ UNREACHABLE_MSG("Unknown type for pointer");
+ }
+
+ Id EmitMemoryRead(Id type, Id address, auto&& fallback) {
+ const Id available_label = OpLabel();
+ const Id fallback_label = OpLabel();
+ const Id merge_label = OpLabel();
+
+ const Id addr = OpFunctionCall(U64, get_bda_pointer, address);
+ const Id is_available = OpINotEqual(U1[1], addr, u64_zero_value);
+ OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone);
+ OpBranchConditional(is_available, available_label, fallback_label);
+
+ // Available
+ AddLabel(available_label);
+ const auto pointer_type = PointerTypeFromType(type);
+ const Id pointer_type_id = physical_pointer_types[pointer_type];
+ const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
+ const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
+ OpBranch(merge_label);
+
+ // Fallback
+ AddLabel(fallback_label);
+ const Id fallback_result = fallback();
+ OpBranch(merge_label);
+
+ // Merge
+ AddLabel(merge_label);
+ const Id final_result =
+ OpPhi(type, fallback_result, fallback_label, result, available_label);
+ return final_result;
+ }
+
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
Stage stage;
LogicalStage l_stage{};
+ Id last_label{};
+
Id void_id{};
Id U8{};
Id S8{};
@@ -161,9 +233,13 @@ public:
Id true_value{};
Id false_value{};
+ Id u8_one_value{};
+ Id u8_zero_value{};
Id u32_one_value{};
Id u32_zero_value{};
Id f32_zero_value{};
+ Id u64_one_value{};
+ Id u64_zero_value{};
Id shared_u8{};
Id shared_u16{};
@@ -231,14 +307,6 @@ public:
bool is_storage = false;
};
- enum class BufferAlias : u32 {
- U8,
- U16,
- U32,
- F32,
- NumAlias,
- };
-
struct BufferSpv {
Id id;
Id pointer_type;
@@ -252,22 +320,40 @@ public:
Id size;
Id size_shorts;
Id size_dwords;
- std::array aliases;
+ std::array aliases;
- const BufferSpv& operator[](BufferAlias alias) const {
+ const BufferSpv& operator[](PointerType alias) const {
return aliases[u32(alias)];
}
- BufferSpv& operator[](BufferAlias alias) {
+ BufferSpv& operator[](PointerType alias) {
return aliases[u32(alias)];
}
};
+ struct PhysicalPointerTypes {
+ std::array types;
+
+ const Id& operator[](PointerType type) const {
+ return types[u32(type)];
+ }
+
+ Id& operator[](PointerType type) {
+ return types[u32(type)];
+ }
+ };
+
Bindings& binding;
boost::container::small_vector buf_type_ids;
boost::container::small_vector buffers;
boost::container::small_vector images;
boost::container::small_vector samplers;
+ PhysicalPointerTypes physical_pointer_types;
+ std::unordered_map first_to_last_label_map;
+
+ size_t flatbuf_index{};
+ size_t bda_pagetable_index{};
+ size_t fault_buffer_index{};
Id sampler_type{};
Id sampler_pointer_type{};
@@ -292,6 +378,11 @@ public:
Id uf10_to_f32{};
Id f32_to_uf10{};
+ Id get_bda_pointer{};
+
+ Id read_const{};
+ Id read_const_dynamic{};
+
private:
void DefineArithmeticTypes();
void DefineInterfaces();
@@ -312,6 +403,10 @@ private:
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
+ Id DefineGetBdaPointer();
+
+ Id DefineReadConst(bool dynamic);
+
Id GetBufferSize(u32 sharp_idx);
};
diff --git a/src/shader_recompiler/frontend/translate/scalar_memory.cpp b/src/shader_recompiler/frontend/translate/scalar_memory.cpp
index 376cc304e..3c6fd3968 100644
--- a/src/shader_recompiler/frontend/translate/scalar_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/scalar_memory.cpp
@@ -39,21 +39,22 @@ void Translator::EmitScalarMemory(const GcnInst& inst) {
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
- const u32 dword_offset = [&] -> u32 {
+ const IR::ScalarReg sbase{inst.src[0].code * 2};
+ const IR::U32 dword_offset = [&] -> IR::U32 {
if (smrd.imm) {
- return smrd.offset;
+ return ir.Imm32(smrd.offset);
}
if (smrd.offset == SQ_SRC_LITERAL) {
- return inst.src[1].code;
+ return ir.Imm32(inst.src[1].code);
}
- UNREACHABLE();
+ return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
- const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
- ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
+ IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));
+ ir.SetScalarReg(dst_reg + i, ir.ReadConst(base, index));
}
}
diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h
index ba28d7e43..d349d7827 100644
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -41,7 +41,9 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class BufferType : u32 {
Guest,
- ReadConstUbo,
+ Flatbuf,
+ BdaPagetable,
+ FaultBuffer,
GdsBuffer,
SharedMemory,
};
@@ -215,11 +217,18 @@ struct Info {
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{};
- bool has_readconst{};
u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
+ enum class ReadConstType {
+ None = 0,
+ Immediate = 1 << 0,
+ Dynamic = 1 << 1,
+ };
+ ReadConstType readconst_types{};
+ IR::Type dma_types{IR::Type::Void};
+
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
user_data{params.user_data} {}
@@ -277,6 +286,7 @@ struct Info {
sizeof(tess_constants));
}
};
+DECLARE_ENUM_FLAG_OPERATORS(Info::ReadConstType);
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
return inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx);
diff --git a/src/shader_recompiler/ir/abstract_syntax_list.cpp b/src/shader_recompiler/ir/abstract_syntax_list.cpp
new file mode 100644
index 000000000..0d967ac11
--- /dev/null
+++ b/src/shader_recompiler/ir/abstract_syntax_list.cpp
@@ -0,0 +1,44 @@
+// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "abstract_syntax_list.h"
+
+namespace Shader::IR {
+
+std::string DumpASLNode(const AbstractSyntaxNode& node,
+ const std::map& block_to_index,
+ const std::map& inst_to_index) {
+ switch (node.type) {
+ case AbstractSyntaxNode::Type::Block:
+ return fmt::format("Block: ${}", block_to_index.at(node.data.block));
+ case AbstractSyntaxNode::Type::If:
+ return fmt::format("If: cond = %{}, body = ${}, merge = ${}",
+ inst_to_index.at(node.data.if_node.cond.Inst()),
+ block_to_index.at(node.data.if_node.body),
+ block_to_index.at(node.data.if_node.merge));
+ case AbstractSyntaxNode::Type::EndIf:
+ return fmt::format("EndIf: merge = ${}", block_to_index.at(node.data.end_if.merge));
+ case AbstractSyntaxNode::Type::Loop:
+ return fmt::format("Loop: body = ${}, continue = ${}, merge = ${}",
+ block_to_index.at(node.data.loop.body),
+ block_to_index.at(node.data.loop.continue_block),
+ block_to_index.at(node.data.loop.merge));
+ case AbstractSyntaxNode::Type::Repeat:
+ return fmt::format("Repeat: cond = %{}, header = ${}, merge = ${}",
+ inst_to_index.at(node.data.repeat.cond.Inst()),
+ block_to_index.at(node.data.repeat.loop_header),
+ block_to_index.at(node.data.repeat.merge));
+ case AbstractSyntaxNode::Type::Break:
+ return fmt::format("Break: cond = %{}, merge = ${}, skip = ${}",
+ inst_to_index.at(node.data.break_node.cond.Inst()),
+ block_to_index.at(node.data.break_node.merge),
+ block_to_index.at(node.data.break_node.skip));
+ case AbstractSyntaxNode::Type::Return:
+ return "Return";
+ case AbstractSyntaxNode::Type::Unreachable:
+ return "Unreachable";
+ };
+ UNREACHABLE();
+}
+
+} // namespace Shader::IR
\ No newline at end of file
diff --git a/src/shader_recompiler/ir/abstract_syntax_list.h b/src/shader_recompiler/ir/abstract_syntax_list.h
index 313a23abc..a620baccb 100644
--- a/src/shader_recompiler/ir/abstract_syntax_list.h
+++ b/src/shader_recompiler/ir/abstract_syntax_list.h
@@ -3,6 +3,7 @@
#pragma once
+#include