mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-22 18:15:14 +00:00
Merge remote-tracking branch 'upstream/main' into batch-sync
This commit is contained in:
commit
cee22c45e8
@ -1,14 +1,28 @@
|
||||
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
file(GLOB QT_KITS LIST_DIRECTORIES true "C:/Qt/*/msvc*_64")
|
||||
list(SORT QT_KITS COMPARE NATURAL)
|
||||
list(REVERSE QT_KITS)
|
||||
if(QT_KITS)
|
||||
list(GET QT_KITS 0 QT_PREFIX)
|
||||
set(CMAKE_PREFIX_PATH "${QT_PREFIX}" CACHE PATH "Qt prefix auto‑detected" FORCE)
|
||||
message(STATUS "Auto-detected Qt prefix: ${QT_PREFIX}")
|
||||
else()
|
||||
message(STATUS "findQt.cmake: no Qt‑Directory found in C:/Qt – please set CMAKE_PREFIX_PATH manually")
|
||||
endif()
|
||||
set(highest_version "0")
|
||||
set(CANDIDATE_DRIVES A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)
|
||||
|
||||
foreach(drive ${CANDIDATE_DRIVES})
|
||||
file(GLOB kits LIST_DIRECTORIES true CONFIGURE_DEPENDS "${drive}:/Qt/*/msvc*_64")
|
||||
foreach(kit IN LISTS kits)
|
||||
get_filename_component(version_dir "${kit}" DIRECTORY)
|
||||
get_filename_component(kit_version "${version_dir}" NAME)
|
||||
|
||||
message(STATUS "DetectQtInstallation.cmake: Detected Qt: ${kit}")
|
||||
|
||||
if (kit_version VERSION_GREATER highest_version)
|
||||
set(highest_version "${kit_version}")
|
||||
set(QT_PREFIX "${kit}")
|
||||
|
||||
endif()
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
if(QT_PREFIX)
|
||||
set(CMAKE_PREFIX_PATH "${QT_PREFIX}" CACHE PATH "Qt prefix auto‑detected" FORCE)
|
||||
message(STATUS "DetectQtInstallation.cmake: Choose newest Qt: ${QT_PREFIX}")
|
||||
else()
|
||||
message(STATUS "DetectQtInstallation.cmake: No Qt‑Directory found in <drive>:/Qt – please set CMAKE_PREFIX_PATH manually")
|
||||
endif()
|
||||
|
2
externals/sirit
vendored
2
externals/sirit
vendored
@ -1 +1 @@
|
||||
Subproject commit 6b450704f6fedb9413d0c89a9eb59d028eb1e6c0
|
||||
Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e
|
@ -60,11 +60,14 @@ static bool overrideControllerColor = false;
|
||||
static int controllerCustomColorRGB[3] = {0, 0, 255};
|
||||
|
||||
// GPU
|
||||
static u32 screenWidth = 1280;
|
||||
static u32 screenHeight = 720;
|
||||
static u32 windowWidth = 1280;
|
||||
static u32 windowHeight = 720;
|
||||
static u32 internalScreenWidth = 1280;
|
||||
static u32 internalScreenHeight = 720;
|
||||
static bool isNullGpu = false;
|
||||
static bool shouldCopyGPUBuffers = false;
|
||||
static bool readbacksEnabled = false;
|
||||
static bool readbackLinearImagesEnabled = false;
|
||||
static bool directMemoryAccessEnabled = false;
|
||||
static bool shouldDumpShaders = false;
|
||||
static bool shouldPatchShaders = false;
|
||||
@ -103,7 +106,7 @@ u32 m_language = 1; // english
|
||||
static std::string trophyKey = "";
|
||||
|
||||
// Expected number of items in the config file
|
||||
static constexpr u64 total_entries = 51;
|
||||
static constexpr u64 total_entries = 54;
|
||||
|
||||
bool allowHDR() {
|
||||
return isHDRAllowed;
|
||||
@ -194,12 +197,20 @@ double getTrophyNotificationDuration() {
|
||||
return trophyNotificationDuration;
|
||||
}
|
||||
|
||||
u32 getScreenWidth() {
|
||||
return screenWidth;
|
||||
u32 getWindowWidth() {
|
||||
return windowWidth;
|
||||
}
|
||||
|
||||
u32 getScreenHeight() {
|
||||
return screenHeight;
|
||||
u32 getWindowHeight() {
|
||||
return windowHeight;
|
||||
}
|
||||
|
||||
u32 getInternalScreenWidth() {
|
||||
return internalScreenHeight;
|
||||
}
|
||||
|
||||
u32 getInternalScreenHeight() {
|
||||
return internalScreenHeight;
|
||||
}
|
||||
|
||||
s32 getGpuId() {
|
||||
@ -262,6 +273,10 @@ bool readbacks() {
|
||||
return readbacksEnabled;
|
||||
}
|
||||
|
||||
bool readbackLinearImages() {
|
||||
return readbackLinearImagesEnabled;
|
||||
}
|
||||
|
||||
bool directMemoryAccess() {
|
||||
return directMemoryAccessEnabled;
|
||||
}
|
||||
@ -334,12 +349,20 @@ void setGpuId(s32 selectedGpuId) {
|
||||
gpuId = selectedGpuId;
|
||||
}
|
||||
|
||||
void setScreenWidth(u32 width) {
|
||||
screenWidth = width;
|
||||
void setWindowWidth(u32 width) {
|
||||
windowWidth = width;
|
||||
}
|
||||
|
||||
void setScreenHeight(u32 height) {
|
||||
screenHeight = height;
|
||||
void setWindowHeight(u32 height) {
|
||||
windowHeight = height;
|
||||
}
|
||||
|
||||
void setInternalScreenWidth(u32 width) {
|
||||
internalScreenWidth = width;
|
||||
}
|
||||
|
||||
void setInternalScreenHeight(u32 height) {
|
||||
internalScreenHeight = height;
|
||||
}
|
||||
|
||||
void setDebugDump(bool enable) {
|
||||
@ -421,6 +444,7 @@ void setCursorState(s16 newCursorState) {
|
||||
void setCursorHideTimeout(int newcursorHideTimeout) {
|
||||
cursorHideTimeout = newcursorHideTimeout;
|
||||
}
|
||||
|
||||
void setTrophyNotificationDuration(double newTrophyNotificationDuration) {
|
||||
trophyNotificationDuration = newTrophyNotificationDuration;
|
||||
}
|
||||
@ -626,11 +650,16 @@ void load(const std::filesystem::path& path) {
|
||||
if (data.contains("GPU")) {
|
||||
const toml::value& gpu = data.at("GPU");
|
||||
|
||||
screenWidth = toml::find_or<int>(gpu, "screenWidth", screenWidth);
|
||||
screenHeight = toml::find_or<int>(gpu, "screenHeight", screenHeight);
|
||||
windowWidth = toml::find_or<int>(gpu, "screenWidth", windowWidth);
|
||||
windowHeight = toml::find_or<int>(gpu, "screenHeight", windowHeight);
|
||||
internalScreenWidth = toml::find_or<int>(gpu, "internalScreenWidth", internalScreenWidth);
|
||||
internalScreenHeight =
|
||||
toml::find_or<int>(gpu, "internalScreenHeight", internalScreenHeight);
|
||||
isNullGpu = toml::find_or<bool>(gpu, "nullGpu", isNullGpu);
|
||||
shouldCopyGPUBuffers = toml::find_or<bool>(gpu, "copyGPUBuffers", shouldCopyGPUBuffers);
|
||||
readbacksEnabled = toml::find_or<bool>(gpu, "readbacks", readbacksEnabled);
|
||||
readbackLinearImagesEnabled =
|
||||
toml::find_or<bool>(gpu, "readbackLinearImages", readbackLinearImagesEnabled);
|
||||
directMemoryAccessEnabled =
|
||||
toml::find_or<bool>(gpu, "directMemoryAccess", directMemoryAccessEnabled);
|
||||
shouldDumpShaders = toml::find_or<bool>(gpu, "dumpShaders", shouldDumpShaders);
|
||||
@ -797,11 +826,14 @@ void save(const std::filesystem::path& path) {
|
||||
data["Input"]["specialPadClass"] = specialPadClass;
|
||||
data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled;
|
||||
data["Input"]["useUnifiedInputConfig"] = useUnifiedInputConfig;
|
||||
data["GPU"]["screenWidth"] = screenWidth;
|
||||
data["GPU"]["screenHeight"] = screenHeight;
|
||||
data["GPU"]["screenWidth"] = windowWidth;
|
||||
data["GPU"]["screenHeight"] = windowHeight;
|
||||
data["GPU"]["internalScreenWidth"] = internalScreenWidth;
|
||||
data["GPU"]["internalScreenHeight"] = internalScreenHeight;
|
||||
data["GPU"]["nullGpu"] = isNullGpu;
|
||||
data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers;
|
||||
data["GPU"]["readbacks"] = readbacksEnabled;
|
||||
data["GPU"]["readbackLinearImages"] = readbackLinearImagesEnabled;
|
||||
data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled;
|
||||
data["GPU"]["dumpShaders"] = shouldDumpShaders;
|
||||
data["GPU"]["patchShaders"] = shouldPatchShaders;
|
||||
@ -897,11 +929,14 @@ void setDefaultValues() {
|
||||
controllerCustomColorRGB[2] = 255;
|
||||
|
||||
// GPU
|
||||
screenWidth = 1280;
|
||||
screenHeight = 720;
|
||||
windowWidth = 1280;
|
||||
windowHeight = 720;
|
||||
internalScreenWidth = 1280;
|
||||
internalScreenHeight = 720;
|
||||
isNullGpu = false;
|
||||
shouldCopyGPUBuffers = false;
|
||||
readbacksEnabled = false;
|
||||
readbackLinearImagesEnabled = false;
|
||||
directMemoryAccessEnabled = false;
|
||||
shouldDumpShaders = false;
|
||||
shouldPatchShaders = false;
|
||||
|
@ -25,10 +25,14 @@ bool getIsFullscreen();
|
||||
void setIsFullscreen(bool enable);
|
||||
std::string getFullscreenMode();
|
||||
void setFullscreenMode(std::string mode);
|
||||
u32 getScreenWidth();
|
||||
u32 getScreenHeight();
|
||||
void setScreenWidth(u32 width);
|
||||
void setScreenHeight(u32 height);
|
||||
u32 getWindowWidth();
|
||||
u32 getWindowHeight();
|
||||
void setWindowWidth(u32 width);
|
||||
void setWindowHeight(u32 height);
|
||||
u32 getInternalScreenWidth();
|
||||
u32 getInternalScreenHeight();
|
||||
void setInternalScreenWidth(u32 width);
|
||||
void setInternalScreenHeight(u32 height);
|
||||
bool debugDump();
|
||||
void setDebugDump(bool enable);
|
||||
s32 getGpuId();
|
||||
@ -47,6 +51,7 @@ bool copyGPUCmdBuffers();
|
||||
void setCopyGPUCmdBuffers(bool enable);
|
||||
bool readbacks();
|
||||
void setReadbacks(bool enable);
|
||||
bool readbackLinearImages();
|
||||
bool directMemoryAccess();
|
||||
void setDirectMemoryAccess(bool enable);
|
||||
bool dumpShaders();
|
||||
|
@ -163,7 +163,9 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan
|
||||
mask = (1ULL << length) - 1;
|
||||
}
|
||||
|
||||
ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64.");
|
||||
if (length + index > 64) {
|
||||
mask = 0xFFFF'FFFF'FFFF'FFFF;
|
||||
}
|
||||
|
||||
// Get lower qword from xmm register
|
||||
c.vmovq(scratch1, xmm_dst);
|
||||
@ -177,8 +179,8 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan
|
||||
c.mov(scratch2, mask);
|
||||
c.and_(scratch1, scratch2);
|
||||
|
||||
// Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't
|
||||
// care to preserve them
|
||||
// Writeback to xmm register, extrq instruction says top 64-bits are undefined but zeroed on
|
||||
// AMD CPUs
|
||||
c.vmovq(xmm_dst, scratch1);
|
||||
|
||||
c.pop(scratch2);
|
||||
@ -287,7 +289,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
|
||||
mask_value = (1ULL << length) - 1;
|
||||
}
|
||||
|
||||
ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64.");
|
||||
if (length + index > 64) {
|
||||
mask_value = 0xFFFF'FFFF'FFFF'FFFF;
|
||||
}
|
||||
|
||||
c.vmovq(scratch1, xmm_src);
|
||||
c.vmovq(scratch2, xmm_dst);
|
||||
@ -307,8 +311,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
|
||||
// dst |= src
|
||||
c.or_(scratch2, scratch1);
|
||||
|
||||
// Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected
|
||||
c.vpinsrq(xmm_dst, xmm_dst, scratch2, 0);
|
||||
// Insert scratch2 into low 64 bits of dst, upper 64 bits are undefined but zeroed on AMD
|
||||
// CPUs
|
||||
c.vmovq(xmm_dst, scratch2);
|
||||
|
||||
c.pop(mask);
|
||||
c.pop(scratch2);
|
||||
@ -374,7 +379,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper
|
||||
c.and_(scratch2, mask);
|
||||
c.or_(scratch2, scratch1);
|
||||
|
||||
// Upper 64 bits are undefined in insertq
|
||||
// Upper 64 bits are undefined in insertq but AMD CPUs zero them
|
||||
c.vmovq(xmm_dst, scratch2);
|
||||
|
||||
c.pop(mask);
|
||||
@ -635,6 +640,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
|
||||
lowQWordDst >>= index;
|
||||
lowQWordDst &= mask;
|
||||
|
||||
memset((u8*)dst + sizeof(u64), 0, sizeof(u64));
|
||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||
|
||||
Common::IncrementRip(ctx, 4);
|
||||
@ -675,6 +681,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) {
|
||||
lowQWordDst &= ~(mask << index);
|
||||
lowQWordDst |= lowQWordSrc << index;
|
||||
|
||||
memset((u8*)dst + sizeof(u64), 0, sizeof(u64));
|
||||
memcpy(dst, &lowQWordDst, sizeof(lowQWordDst));
|
||||
|
||||
Common::IncrementRip(ctx, 4);
|
||||
|
@ -43,8 +43,8 @@ public:
|
||||
openEvent.param.rect.x = m_param.ime.posx;
|
||||
openEvent.param.rect.y = m_param.ime.posy;
|
||||
} else {
|
||||
openEvent.param.resource_id_array.userId = 1;
|
||||
openEvent.param.resource_id_array.resourceId[0] = 1;
|
||||
openEvent.param.resource_id_array.user_id = 1;
|
||||
openEvent.param.resource_id_array.resource_id[0] = 1;
|
||||
}
|
||||
|
||||
// Are we supposed to call the event handler on init with
|
||||
@ -59,10 +59,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
s32 Update(OrbisImeEventHandler handler) {
|
||||
Error Update(OrbisImeEventHandler handler) {
|
||||
if (!m_ime_mode) {
|
||||
/* We don't handle any events for ImeKeyboard */
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
std::unique_lock lock{g_ime_state.queue_mutex};
|
||||
@ -73,7 +73,7 @@ public:
|
||||
Execute(handler, &event, false);
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
void Execute(OrbisImeEventHandler handler, OrbisImeEvent* event, bool use_param_handler) {
|
||||
@ -94,14 +94,14 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
s32 SetText(const char16_t* text, u32 length) {
|
||||
Error SetText(const char16_t* text, u32 length) {
|
||||
g_ime_state.SetText(text, length);
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
s32 SetCaret(const OrbisImeCaret* caret) {
|
||||
Error SetCaret(const OrbisImeCaret* caret) {
|
||||
g_ime_state.SetCaret(caret->index);
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
bool IsIme() {
|
||||
@ -222,11 +222,11 @@ int PS4_SYSV_ABI sceImeGetPanelPositionAndForm() {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height) {
|
||||
Error PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height) {
|
||||
LOG_INFO(Lib_Ime, "called");
|
||||
|
||||
if (!width || !height) {
|
||||
return ORBIS_IME_ERROR_INVALID_ADDRESS;
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
switch (param->type) {
|
||||
@ -244,18 +244,18 @@ s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32*
|
||||
break;
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) {
|
||||
Error PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) {
|
||||
LOG_INFO(Lib_Ime, "(STUBBED) called");
|
||||
|
||||
if (!g_keyboard_handler) {
|
||||
return ORBIS_IME_ERROR_NOT_OPENED;
|
||||
return Error::NOT_OPENED;
|
||||
}
|
||||
|
||||
g_keyboard_handler.release();
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceImeKeyboardGetInfo() {
|
||||
@ -268,25 +268,25 @@ int PS4_SYSV_ABI sceImeKeyboardGetResourceId() {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) {
|
||||
Error PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) {
|
||||
LOG_INFO(Lib_Ime, "called");
|
||||
|
||||
if (!param) {
|
||||
return ORBIS_IME_ERROR_INVALID_ADDRESS;
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
if (!param->arg) {
|
||||
return ORBIS_IME_ERROR_INVALID_ARG;
|
||||
return Error::INVALID_ARG;
|
||||
}
|
||||
if (!param->handler) {
|
||||
return ORBIS_IME_ERROR_INVALID_HANDLER;
|
||||
return Error::INVALID_HANDLER;
|
||||
}
|
||||
|
||||
if (g_keyboard_handler) {
|
||||
return ORBIS_IME_ERROR_BUSY;
|
||||
return Error::BUSY;
|
||||
}
|
||||
|
||||
g_keyboard_handler = std::make_unique<ImeHandler>(param);
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceImeKeyboardOpenInternal() {
|
||||
@ -304,18 +304,18 @@ int PS4_SYSV_ABI sceImeKeyboardUpdate() {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended) {
|
||||
Error PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const OrbisImeParamExtended* extended) {
|
||||
LOG_INFO(Lib_Ime, "called");
|
||||
|
||||
if (!param) {
|
||||
return ORBIS_IME_ERROR_INVALID_ADDRESS;
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
if (g_ime_handler) {
|
||||
return ORBIS_IME_ERROR_BUSY;
|
||||
return Error::BUSY;
|
||||
}
|
||||
|
||||
g_ime_handler = std::make_unique<ImeHandler>(param);
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceImeOpenInternal() {
|
||||
@ -339,27 +339,27 @@ int PS4_SYSV_ABI sceImeSetCandidateIndex() {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) {
|
||||
Error PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) {
|
||||
LOG_TRACE(Lib_Ime, "called");
|
||||
|
||||
if (!g_ime_handler) {
|
||||
return ORBIS_IME_ERROR_NOT_OPENED;
|
||||
return Error::NOT_OPENED;
|
||||
}
|
||||
if (!caret) {
|
||||
return ORBIS_IME_ERROR_INVALID_ADDRESS;
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
return g_ime_handler->SetCaret(caret);
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) {
|
||||
Error PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) {
|
||||
LOG_TRACE(Lib_Ime, "called");
|
||||
|
||||
if (!g_ime_handler) {
|
||||
return ORBIS_IME_ERROR_NOT_OPENED;
|
||||
return Error::NOT_OPENED;
|
||||
}
|
||||
if (!text) {
|
||||
return ORBIS_IME_ERROR_INVALID_ADDRESS;
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
return g_ime_handler->SetText(text, length);
|
||||
@ -370,7 +370,7 @@ int PS4_SYSV_ABI sceImeSetTextGeometry() {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) {
|
||||
Error PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) {
|
||||
if (g_ime_handler) {
|
||||
g_ime_handler->Update(handler);
|
||||
}
|
||||
@ -380,10 +380,10 @@ s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) {
|
||||
}
|
||||
|
||||
if (!g_ime_handler || !g_keyboard_handler) {
|
||||
return ORBIS_IME_ERROR_NOT_OPENED;
|
||||
return Error::NOT_OPENED;
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceImeVshClearPreedit() {
|
||||
|
@ -13,72 +13,6 @@ class SymbolsResolver;
|
||||
|
||||
namespace Libraries::Ime {
|
||||
|
||||
constexpr u32 ORBIS_IME_MAX_TEXT_LENGTH = 2048;
|
||||
|
||||
enum class OrbisImeKeyboardOption : u32 {
|
||||
Default = 0,
|
||||
Repeat = 1,
|
||||
RepeatEachKey = 2,
|
||||
AddOsk = 4,
|
||||
EffectiveWithIme = 8,
|
||||
DisableResume = 16,
|
||||
DisableCapslockWithoutShift = 32,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption)
|
||||
|
||||
enum class OrbisImeOption : u32 {
|
||||
DEFAULT = 0,
|
||||
MULTILINE = 1,
|
||||
NO_AUTO_CAPITALIZATION = 2,
|
||||
PASSWORD = 4,
|
||||
LANGUAGES_FORCED = 8,
|
||||
EXT_KEYBOARD = 16,
|
||||
NO_LEARNING = 32,
|
||||
FIXED_POSITION = 64,
|
||||
DISABLE_RESUME = 256,
|
||||
DISABLE_AUTO_SPACE = 512,
|
||||
DISABLE_POSITION_ADJUSTMENT = 2048,
|
||||
EXPANDED_PREEDIT_BUFFER = 4096,
|
||||
USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192,
|
||||
USE_2K_COORDINATES = 16384,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption)
|
||||
|
||||
struct OrbisImeKeyboardParam {
|
||||
OrbisImeKeyboardOption option;
|
||||
s8 reserved1[4];
|
||||
void* arg;
|
||||
OrbisImeEventHandler handler;
|
||||
s8 reserved2[8];
|
||||
};
|
||||
|
||||
struct OrbisImeParam {
|
||||
s32 user_id;
|
||||
OrbisImeType type;
|
||||
u64 supported_languages;
|
||||
OrbisImeEnterLabel enter_label;
|
||||
OrbisImeInputMethod input_method;
|
||||
OrbisImeTextFilter filter;
|
||||
OrbisImeOption option;
|
||||
u32 maxTextLength;
|
||||
char16_t* inputTextBuffer;
|
||||
float posx;
|
||||
float posy;
|
||||
OrbisImeHorizontalAlignment horizontal_alignment;
|
||||
OrbisImeVerticalAlignment vertical_alignment;
|
||||
void* work;
|
||||
void* arg;
|
||||
OrbisImeEventHandler handler;
|
||||
s8 reserved[8];
|
||||
};
|
||||
|
||||
struct OrbisImeCaret {
|
||||
f32 x;
|
||||
f32 y;
|
||||
u32 height;
|
||||
u32 index;
|
||||
};
|
||||
|
||||
int PS4_SYSV_ABI FinalizeImeModule();
|
||||
int PS4_SYSV_ABI InitializeImeModule();
|
||||
int PS4_SYSV_ABI sceImeCheckFilterText();
|
||||
@ -98,22 +32,22 @@ int PS4_SYSV_ABI sceImeDisableController();
|
||||
int PS4_SYSV_ABI sceImeFilterText();
|
||||
int PS4_SYSV_ABI sceImeForTestFunction();
|
||||
int PS4_SYSV_ABI sceImeGetPanelPositionAndForm();
|
||||
s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height);
|
||||
s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId);
|
||||
Error PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height);
|
||||
Error PS4_SYSV_ABI sceImeKeyboardClose(s32 userId);
|
||||
int PS4_SYSV_ABI sceImeKeyboardGetInfo();
|
||||
int PS4_SYSV_ABI sceImeKeyboardGetResourceId();
|
||||
s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param);
|
||||
Error PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param);
|
||||
int PS4_SYSV_ABI sceImeKeyboardOpenInternal();
|
||||
int PS4_SYSV_ABI sceImeKeyboardSetMode();
|
||||
int PS4_SYSV_ABI sceImeKeyboardUpdate();
|
||||
s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended);
|
||||
Error PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const OrbisImeParamExtended* extended);
|
||||
int PS4_SYSV_ABI sceImeOpenInternal();
|
||||
void PS4_SYSV_ABI sceImeParamInit(OrbisImeParam* param);
|
||||
int PS4_SYSV_ABI sceImeSetCandidateIndex();
|
||||
s32 PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret);
|
||||
s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length);
|
||||
Error PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret);
|
||||
Error PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length);
|
||||
int PS4_SYSV_ABI sceImeSetTextGeometry();
|
||||
s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler);
|
||||
Error PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler);
|
||||
int PS4_SYSV_ABI sceImeVshClearPreedit();
|
||||
int PS4_SYSV_ABI sceImeVshClose();
|
||||
int PS4_SYSV_ABI sceImeVshConfirmPreedit();
|
||||
|
@ -3,9 +3,108 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "core/libraries/rtc/rtc.h"
|
||||
|
||||
constexpr u32 ORBIS_IME_MAX_TEXT_LENGTH = 2048;
|
||||
constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 2048;
|
||||
|
||||
enum class Error : u32 {
|
||||
OK = 0x0,
|
||||
BUSY = 0x80bc0001,
|
||||
NOT_OPENED = 0x80bc0002,
|
||||
NO_MEMORY = 0x80bc0003,
|
||||
CONNECTION_FAILED = 0x80bc0004,
|
||||
TOO_MANY_REQUESTS = 0x80bc0005,
|
||||
INVALID_TEXT = 0x80bc0006,
|
||||
EVENT_OVERFLOW = 0x80bc0007,
|
||||
NOT_ACTIVE = 0x80bc0008,
|
||||
IME_SUSPENDING = 0x80bc0009,
|
||||
DEVICE_IN_USE = 0x80bc000a,
|
||||
INVALID_USER_ID = 0x80bc0010,
|
||||
INVALID_TYPE = 0x80bc0011,
|
||||
INVALID_SUPPORTED_LANGUAGES = 0x80bc0012,
|
||||
INVALID_ENTER_LABEL = 0x80bc0013,
|
||||
INVALID_INPUT_METHOD = 0x80bc0014,
|
||||
INVALID_OPTION = 0x80bc0015,
|
||||
INVALID_MAX_TEXT_LENGTH = 0x80bc0016,
|
||||
INVALID_INPUT_TEXT_BUFFER = 0x80bc0017,
|
||||
INVALID_POSX = 0x80bc0018,
|
||||
INVALID_POSY = 0x80bc0019,
|
||||
INVALID_HORIZONTALIGNMENT = 0x80bc001a,
|
||||
INVALID_VERTICALALIGNMENT = 0x80bc001b,
|
||||
INVALID_EXTENDED = 0x80bc001c,
|
||||
INVALID_KEYBOARD_TYPE = 0x80bc001d,
|
||||
INVALID_WORK = 0x80bc0020,
|
||||
INVALID_ARG = 0x80bc0021,
|
||||
INVALID_HANDLER = 0x80bc0022,
|
||||
NO_RESOURCE_ID = 0x80bc0023,
|
||||
INVALID_MODE = 0x80bc0024,
|
||||
INVALID_PARAM = 0x80bc0030,
|
||||
INVALID_ADDRESS = 0x80bc0031,
|
||||
INVALID_RESERVED = 0x80bc0032,
|
||||
INVALID_TIMING = 0x80bc0033,
|
||||
INTERNAL = 0x80bc00ff,
|
||||
DIALOG_INVALID_TITLE = 0x80bc0101,
|
||||
DIALOG_NOT_RUNNING = 0x80bc0105,
|
||||
DIALOG_NOT_FINISHED = 0x80bc0106,
|
||||
DIALOG_NOT_IN_USE = 0x80bc0107
|
||||
};
|
||||
|
||||
enum class OrbisImeOption : u32 {
|
||||
DEFAULT = 0,
|
||||
MULTILINE = 1,
|
||||
NO_AUTO_CAPITALIZATION = 2,
|
||||
PASSWORD = 4,
|
||||
LANGUAGES_FORCED = 8,
|
||||
EXT_KEYBOARD = 16,
|
||||
NO_LEARNING = 32,
|
||||
FIXED_POSITION = 64,
|
||||
DISABLE_COPY_PASTE = 128,
|
||||
DISABLE_RESUME = 256,
|
||||
DISABLE_AUTO_SPACE = 512,
|
||||
DISABLE_POSITION_ADJUSTMENT = 2048,
|
||||
EXPANDED_PREEDIT_BUFFER = 4096,
|
||||
USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192,
|
||||
USE_2K_COORDINATES = 16384,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption);
|
||||
|
||||
enum class OrbisImeLanguage : u64 {
|
||||
DANISH = 0x0000000000000001,
|
||||
GERMAN = 0x0000000000000002,
|
||||
ENGLISH_US = 0x0000000000000004,
|
||||
SPANISH = 0x0000000000000008,
|
||||
FRENCH = 0x0000000000000010,
|
||||
ITALIAN = 0x0000000000000020,
|
||||
DUTCH = 0x0000000000000040,
|
||||
NORWEGIAN = 0x0000000000000080,
|
||||
POLISH = 0x0000000000000100,
|
||||
PORTUGUESE_PT = 0x0000000000000200,
|
||||
RUSSIAN = 0x0000000000000400,
|
||||
FINNISH = 0x0000000000000800,
|
||||
SWEDISH = 0x0000000000001000,
|
||||
JAPANESE = 0x0000000000002000,
|
||||
KOREAN = 0x0000000000004000,
|
||||
SIMPLIFIED_CHINESE = 0x0000000000008000,
|
||||
TRADITIONAL_CHINESE = 0x0000000000010000,
|
||||
PORTUGUESE_BR = 0x0000000000020000,
|
||||
ENGLISH_GB = 0x0000000000040000,
|
||||
TURKISH = 0x0000000000080000,
|
||||
SPANISH_LA = 0x0000000000100000,
|
||||
ARABIC = 0x0000000001000000,
|
||||
FRENCH_CA = 0x0000000002000000,
|
||||
THAI = 0x0000000004000000,
|
||||
CZECH = 0x0000000008000000,
|
||||
GREEK = 0x0000000010000000,
|
||||
INDONESIAN = 0x0000000020000000,
|
||||
VIETNAMESE = 0x0000000040000000,
|
||||
ROMANIAN = 0x0000000080000000,
|
||||
HUNGARIAN = 0x0000000100000000,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeLanguage);
|
||||
|
||||
enum class OrbisImeType : u32 {
|
||||
Default = 0,
|
||||
BasicLatin = 1,
|
||||
@ -41,6 +140,7 @@ enum class OrbisImeEventId : u32 {
|
||||
Open = 0,
|
||||
UpdateText = 1,
|
||||
UpdateCaret = 2,
|
||||
ChangeSize = 3,
|
||||
PressClose = 4,
|
||||
PressEnter = 5,
|
||||
Abort = 6,
|
||||
@ -51,6 +151,10 @@ enum class OrbisImeEventId : u32 {
|
||||
CandidateDone = 11,
|
||||
CandidateCancel = 12,
|
||||
ChangeDevice = 14,
|
||||
JumpToNextObject = 15,
|
||||
JumpToBeforeObject = 16,
|
||||
ChangeWindowType = 17,
|
||||
|
||||
ChangeInputMethodState = 18,
|
||||
|
||||
KeyboardOpen = 256,
|
||||
@ -110,6 +214,13 @@ enum class OrbisImeDeviceType : u32 {
|
||||
RemoteOsk = 3,
|
||||
};
|
||||
|
||||
enum class OrbisImePanelPriority : u32 {
|
||||
Default = 0,
|
||||
Alphabet = 1,
|
||||
Symbol = 2,
|
||||
Accent = 3,
|
||||
};
|
||||
|
||||
struct OrbisImeRect {
|
||||
f32 x;
|
||||
f32 y;
|
||||
@ -117,8 +228,22 @@ struct OrbisImeRect {
|
||||
u32 height;
|
||||
};
|
||||
|
||||
struct OrbisImeColor {
|
||||
u8 r;
|
||||
u8 g;
|
||||
u8 b;
|
||||
u8 a;
|
||||
};
|
||||
|
||||
enum class OrbisImeTextAreaMode : u32 {
|
||||
Disable = 0,
|
||||
Edit = 1,
|
||||
Preedit = 2,
|
||||
Select = 3,
|
||||
};
|
||||
|
||||
struct OrbisImeTextAreaProperty {
|
||||
u32 mode; // OrbisImeTextAreaMode
|
||||
OrbisImeTextAreaMode mode;
|
||||
u32 index;
|
||||
s32 length;
|
||||
};
|
||||
@ -135,14 +260,14 @@ struct OrbisImeKeycode {
|
||||
char16_t character;
|
||||
u32 status;
|
||||
OrbisImeKeyboardType type;
|
||||
s32 user_id;
|
||||
s32 user_id; // Todo: switch to OrbisUserServiceUserId
|
||||
u32 resource_id;
|
||||
Libraries::Rtc::OrbisRtcTick timestamp;
|
||||
};
|
||||
|
||||
struct OrbisImeKeyboardResourceIdArray {
|
||||
s32 userId;
|
||||
u32 resourceId[5];
|
||||
s32 user_id; // Todo: switch to OrbisUserServiceUserId
|
||||
u32 resource_id[5];
|
||||
};
|
||||
|
||||
enum class OrbisImeCaretMovementDirection : u32 {
|
||||
@ -159,6 +284,16 @@ enum class OrbisImeCaretMovementDirection : u32 {
|
||||
Bottom = 10,
|
||||
};
|
||||
|
||||
enum class OrbisImePanelType : u32 {
|
||||
Hide = 0,
|
||||
Osk = 1,
|
||||
Dialog = 2,
|
||||
Candidate = 3,
|
||||
Edit = 4,
|
||||
EditAndCandidate = 5,
|
||||
Accessibility = 6,
|
||||
};
|
||||
|
||||
union OrbisImeEventParam {
|
||||
OrbisImeRect rect;
|
||||
OrbisImeEditText text;
|
||||
@ -168,6 +303,7 @@ union OrbisImeEventParam {
|
||||
char16_t* candidate_word;
|
||||
s32 candidate_index;
|
||||
OrbisImeDeviceType device_type;
|
||||
OrbisImePanelType panel_type;
|
||||
u32 input_method_state;
|
||||
s8 reserved[64];
|
||||
};
|
||||
@ -177,7 +313,95 @@ struct OrbisImeEvent {
|
||||
OrbisImeEventParam param;
|
||||
};
|
||||
|
||||
using OrbisImeExtKeyboardFilter = PS4_SYSV_ABI int (*)(const OrbisImeKeycode* srcKeycode,
|
||||
u16* outKeycode, u32* outStatus,
|
||||
void* reserved);
|
||||
|
||||
using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength,
|
||||
const char16_t* srcText, u32 srcTextLength);
|
||||
|
||||
using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e);
|
||||
|
||||
enum class OrbisImeKeyboardOption : u32 {
|
||||
Default = 0,
|
||||
Repeat = 1,
|
||||
RepeatEachKey = 2,
|
||||
AddOsk = 4,
|
||||
EffectiveWithIme = 8,
|
||||
DisableResume = 16,
|
||||
DisableCapslockWithoutShift = 32,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption)
|
||||
|
||||
struct OrbisImeKeyboardParam {
|
||||
OrbisImeKeyboardOption option;
|
||||
s8 reserved1[4];
|
||||
void* arg;
|
||||
OrbisImeEventHandler handler;
|
||||
s8 reserved2[8];
|
||||
};
|
||||
|
||||
struct OrbisImeParam {
|
||||
s32 user_id; // Todo: switch to OrbisUserServiceUserId
|
||||
OrbisImeType type;
|
||||
u64 supported_languages; // OrbisImeLanguage flags
|
||||
OrbisImeEnterLabel enter_label;
|
||||
OrbisImeInputMethod input_method;
|
||||
OrbisImeTextFilter filter;
|
||||
OrbisImeOption option;
|
||||
u32 maxTextLength;
|
||||
char16_t* inputTextBuffer;
|
||||
f32 posx;
|
||||
f32 posy;
|
||||
OrbisImeHorizontalAlignment horizontal_alignment;
|
||||
OrbisImeVerticalAlignment vertical_alignment;
|
||||
void* work;
|
||||
void* arg;
|
||||
OrbisImeEventHandler handler;
|
||||
s8 reserved[8];
|
||||
};
|
||||
|
||||
struct OrbisImeCaret {
|
||||
f32 x;
|
||||
f32 y;
|
||||
u32 height;
|
||||
u32 index;
|
||||
};
|
||||
|
||||
struct OrbisImeDialogParam {
|
||||
s32 user_id;
|
||||
OrbisImeType type;
|
||||
u64 supported_languages; // OrbisImeLanguage flags
|
||||
OrbisImeEnterLabel enter_label;
|
||||
OrbisImeInputMethod input_method;
|
||||
OrbisImeTextFilter filter;
|
||||
OrbisImeOption option;
|
||||
u32 max_text_length;
|
||||
char16_t* input_text_buffer;
|
||||
f32 posx;
|
||||
f32 posy;
|
||||
OrbisImeHorizontalAlignment horizontal_alignment;
|
||||
OrbisImeVerticalAlignment vertical_alignment;
|
||||
const char16_t* placeholder;
|
||||
const char16_t* title;
|
||||
s8 reserved[16];
|
||||
};
|
||||
|
||||
struct OrbisImeParamExtended {
|
||||
u32 option; // OrbisImeExtOption flags
|
||||
OrbisImeColor color_base;
|
||||
OrbisImeColor color_line;
|
||||
OrbisImeColor color_text_field;
|
||||
OrbisImeColor color_preedit;
|
||||
OrbisImeColor color_button_default;
|
||||
OrbisImeColor color_button_function;
|
||||
OrbisImeColor color_button_symbol;
|
||||
OrbisImeColor color_text;
|
||||
OrbisImeColor color_special;
|
||||
OrbisImePanelPriority priority;
|
||||
char* additional_dictionary_path;
|
||||
OrbisImeExtKeyboardFilter ext_keyboard_filter;
|
||||
u32 disable_device;
|
||||
u32 ext_keyboard_mode;
|
||||
s8 reserved[60];
|
||||
};
|
||||
|
@ -20,19 +20,19 @@ static OrbisImeDialogResult g_ime_dlg_result{};
|
||||
static ImeDialogState g_ime_dlg_state{};
|
||||
static ImeDialogUi g_ime_dlg_ui;
|
||||
|
||||
static bool IsValidOption(OrbisImeDialogOption option, OrbisImeType type) {
|
||||
if (False(~option &
|
||||
(OrbisImeDialogOption::Multiline | OrbisImeDialogOption::NoAutoCompletion))) {
|
||||
static bool IsValidOption(OrbisImeOption option, OrbisImeType type) {
|
||||
if (False(~option & (OrbisImeOption::MULTILINE |
|
||||
OrbisImeOption::NO_AUTO_CAPITALIZATION /* NoAutoCompletion */))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (True(option & OrbisImeDialogOption::Multiline) && type != OrbisImeType::Default &&
|
||||
if (True(option & OrbisImeOption::MULTILINE) && type != OrbisImeType::Default &&
|
||||
type != OrbisImeType::BasicLatin) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (True(option & OrbisImeDialogOption::NoAutoCompletion) && type != OrbisImeType::Number &&
|
||||
type != OrbisImeType::BasicLatin) {
|
||||
if (True(option & OrbisImeOption::NO_AUTO_CAPITALIZATION /* NoAutoCompletion */) &&
|
||||
type != OrbisImeType::Number && type != OrbisImeType::BasicLatin) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -96,7 +96,7 @@ Error PS4_SYSV_ABI sceImeDialogGetPanelSize(const OrbisImeDialogParam* param, u3
|
||||
case OrbisImeType::Url:
|
||||
case OrbisImeType::Mail:
|
||||
*width = 500; // original: 793
|
||||
if (True(param->option & OrbisImeDialogOption::Multiline)) {
|
||||
if (True(param->option & OrbisImeOption::MULTILINE)) {
|
||||
*height = 300; // original: 576
|
||||
} else {
|
||||
*height = 150; // original: 476
|
||||
@ -149,18 +149,20 @@ OrbisImeDialogStatus PS4_SYSV_ABI sceImeDialogGetStatus() {
|
||||
}
|
||||
|
||||
Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExtended* extended) {
|
||||
LOG_INFO(Lib_ImeDialog, ">> sceImeDialogInit: entering, param={}, extended={}",
|
||||
static_cast<void*>(param), static_cast<void*>(extended));
|
||||
if (g_ime_dlg_status != OrbisImeDialogStatus::None) {
|
||||
LOG_INFO(Lib_ImeDialog, "IME dialog is already running");
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: busy (status=%u)", (u32)g_ime_dlg_status);
|
||||
return Error::BUSY;
|
||||
}
|
||||
|
||||
if (param == nullptr) {
|
||||
LOG_INFO(Lib_ImeDialog, "called with param (NULL)");
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: param is null");
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
if (!magic_enum::enum_contains(param->type)) {
|
||||
LOG_INFO(Lib_ImeDialog, "Invalid param->type");
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid param->type=%u", (u32)param->type);
|
||||
return Error::INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
@ -168,16 +170,14 @@ Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExt
|
||||
// TODO: do correct param->supportedLanguages validation
|
||||
|
||||
if (param->posx < 0.0f ||
|
||||
param->posx >=
|
||||
MAX_X_POSITIONS[False(param->option & OrbisImeDialogOption::LargeResolution)]) {
|
||||
LOG_INFO(Lib_ImeDialog, "Invalid param->posx");
|
||||
param->posx >= MAX_X_POSITIONS[False(param->option & OrbisImeOption::USE_2K_COORDINATES)]) {
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid posx=%f", param->posx);
|
||||
return Error::INVALID_POSX;
|
||||
}
|
||||
|
||||
if (param->posy < 0.0f ||
|
||||
param->posy >=
|
||||
MAX_Y_POSITIONS[False(param->option & OrbisImeDialogOption::LargeResolution)]) {
|
||||
LOG_INFO(Lib_ImeDialog, "Invalid param->posy");
|
||||
param->posy >= MAX_Y_POSITIONS[False(param->option & OrbisImeOption::USE_2K_COORDINATES)]) {
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid posy=%f", param->posy);
|
||||
return Error::INVALID_POSY;
|
||||
}
|
||||
|
||||
@ -192,12 +192,13 @@ Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExt
|
||||
}
|
||||
|
||||
if (!IsValidOption(param->option, param->type)) {
|
||||
LOG_INFO(Lib_ImeDialog, "Invalid param->option");
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid option=0x%X for type=%u",
|
||||
static_cast<u32>(param->option), (u32)param->type);
|
||||
return Error::INVALID_PARAM;
|
||||
}
|
||||
|
||||
if (param->input_text_buffer == nullptr) {
|
||||
LOG_INFO(Lib_ImeDialog, "Invalid param->inputTextBuffer");
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: input_text_buffer is null");
|
||||
return Error::INVALID_INPUT_TEXT_BUFFER;
|
||||
}
|
||||
|
||||
@ -220,16 +221,24 @@ Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExt
|
||||
}
|
||||
}
|
||||
|
||||
if (param->max_text_length > ORBIS_IME_DIALOG_MAX_TEXT_LENGTH) {
|
||||
LOG_INFO(Lib_ImeDialog, "Invalid param->maxTextLength");
|
||||
if (param->max_text_length == 0 || param->max_text_length > ORBIS_IME_MAX_TEXT_LENGTH) {
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid max_text_length=%u",
|
||||
param->max_text_length);
|
||||
return Error::INVALID_MAX_TEXT_LENGTH;
|
||||
}
|
||||
|
||||
// Title string validation
|
||||
if (param->title != nullptr && !std::char_traits<char16_t>::length(param->title)) {
|
||||
LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: title is empty");
|
||||
return Error::INVALID_PARAM;
|
||||
}
|
||||
|
||||
g_ime_dlg_result = {};
|
||||
g_ime_dlg_state = ImeDialogState(param, extended);
|
||||
g_ime_dlg_status = OrbisImeDialogStatus::Running;
|
||||
g_ime_dlg_ui = ImeDialogUi(&g_ime_dlg_state, &g_ime_dlg_status, &g_ime_dlg_result);
|
||||
|
||||
LOG_INFO(Lib_ImeDialog, "<< sceImeDialogInit: successful, status now=Running");
|
||||
return Error::OK;
|
||||
}
|
||||
|
||||
|
@ -13,50 +13,6 @@ class SymbolsResolver;
|
||||
|
||||
namespace Libraries::ImeDialog {
|
||||
|
||||
constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 2048;
|
||||
|
||||
enum class Error : u32 {
|
||||
OK = 0x0,
|
||||
BUSY = 0x80bc0001,
|
||||
NOT_OPENED = 0x80bc0002,
|
||||
NO_MEMORY = 0x80bc0003,
|
||||
CONNECTION_FAILED = 0x80bc0004,
|
||||
TOO_MANY_REQUESTS = 0x80bc0005,
|
||||
INVALID_TEXT = 0x80bc0006,
|
||||
EVENT_OVERFLOW = 0x80bc0007,
|
||||
NOT_ACTIVE = 0x80bc0008,
|
||||
IME_SUSPENDING = 0x80bc0009,
|
||||
DEVICE_IN_USE = 0x80bc000a,
|
||||
INVALID_USER_ID = 0x80bc0010,
|
||||
INVALID_TYPE = 0x80bc0011,
|
||||
INVALID_SUPPORTED_LANGUAGES = 0x80bc0012,
|
||||
INVALID_ENTER_LABEL = 0x80bc0013,
|
||||
INVALID_INPUT_METHOD = 0x80bc0014,
|
||||
INVALID_OPTION = 0x80bc0015,
|
||||
INVALID_MAX_TEXT_LENGTH = 0x80bc0016,
|
||||
INVALID_INPUT_TEXT_BUFFER = 0x80bc0017,
|
||||
INVALID_POSX = 0x80bc0018,
|
||||
INVALID_POSY = 0x80bc0019,
|
||||
INVALID_HORIZONTALIGNMENT = 0x80bc001a,
|
||||
INVALID_VERTICALALIGNMENT = 0x80bc001b,
|
||||
INVALID_EXTENDED = 0x80bc001c,
|
||||
INVALID_KEYBOARD_TYPE = 0x80bc001d,
|
||||
INVALID_WORK = 0x80bc0020,
|
||||
INVALID_ARG = 0x80bc0021,
|
||||
INVALID_HANDLER = 0x80bc0022,
|
||||
NO_RESOURCE_ID = 0x80bc0023,
|
||||
INVALID_MODE = 0x80bc0024,
|
||||
INVALID_PARAM = 0x80bc0030,
|
||||
INVALID_ADDRESS = 0x80bc0031,
|
||||
INVALID_RESERVED = 0x80bc0032,
|
||||
INVALID_TIMING = 0x80bc0033,
|
||||
INTERNAL = 0x80bc00ff,
|
||||
DIALOG_INVALID_TITLE = 0x80bc0101,
|
||||
DIALOG_NOT_RUNNING = 0x80bc0105,
|
||||
DIALOG_NOT_FINISHED = 0x80bc0106,
|
||||
DIALOG_NOT_IN_USE = 0x80bc0107,
|
||||
};
|
||||
|
||||
enum class OrbisImeDialogStatus : u32 {
|
||||
None = 0,
|
||||
Running = 1,
|
||||
@ -69,87 +25,11 @@ enum class OrbisImeDialogEndStatus : u32 {
|
||||
Aborted = 2,
|
||||
};
|
||||
|
||||
enum class OrbisImeDialogOption : u32 {
|
||||
Default = 0,
|
||||
Multiline = 1,
|
||||
NoAutoCorrection = 2,
|
||||
NoAutoCompletion = 4,
|
||||
// TODO: Document missing options
|
||||
LargeResolution = 1024,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeDialogOption)
|
||||
|
||||
enum class OrbisImePanelPriority : u32 {
|
||||
Default = 0,
|
||||
Alphabet = 1,
|
||||
Symbol = 2,
|
||||
Accent = 3,
|
||||
};
|
||||
|
||||
struct OrbisImeColor {
|
||||
u8 r;
|
||||
u8 g;
|
||||
u8 b;
|
||||
u8 a;
|
||||
};
|
||||
|
||||
struct OrbisImeDialogResult {
|
||||
OrbisImeDialogEndStatus endstatus;
|
||||
s32 reserved[12];
|
||||
};
|
||||
|
||||
struct OrbisImeKeycode {
|
||||
u16 keycode;
|
||||
char16_t character;
|
||||
u32 status;
|
||||
OrbisImeKeyboardType type;
|
||||
s32 user_id;
|
||||
u32 resource_id;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
using OrbisImeExtKeyboardFilter = PS4_SYSV_ABI int (*)(const OrbisImeKeycode* srcKeycode,
|
||||
u16* outKeycode, u32* outStatus,
|
||||
void* reserved);
|
||||
|
||||
struct OrbisImeDialogParam {
|
||||
s32 user_id;
|
||||
OrbisImeType type;
|
||||
u64 supported_languages;
|
||||
OrbisImeEnterLabel enter_label;
|
||||
OrbisImeInputMethod input_method;
|
||||
OrbisImeTextFilter filter;
|
||||
OrbisImeDialogOption option;
|
||||
u32 max_text_length;
|
||||
char16_t* input_text_buffer;
|
||||
float posx;
|
||||
float posy;
|
||||
OrbisImeHorizontalAlignment horizontal_alignment;
|
||||
OrbisImeVerticalAlignment vertical_alignment;
|
||||
const char16_t* placeholder;
|
||||
const char16_t* title;
|
||||
s8 reserved[16];
|
||||
};
|
||||
|
||||
struct OrbisImeParamExtended {
|
||||
u32 option; // OrbisImeDialogOptionExtended
|
||||
OrbisImeColor color_base;
|
||||
OrbisImeColor color_line;
|
||||
OrbisImeColor color_text_field;
|
||||
OrbisImeColor color_preedit;
|
||||
OrbisImeColor color_button_default;
|
||||
OrbisImeColor color_button_function;
|
||||
OrbisImeColor color_button_symbol;
|
||||
OrbisImeColor color_text;
|
||||
OrbisImeColor color_special;
|
||||
OrbisImePanelPriority priority;
|
||||
char* additional_dictionary_path;
|
||||
OrbisImeExtKeyboardFilter ext_keyboard_filter;
|
||||
uint32_t disable_device;
|
||||
uint32_t ext_keyboard_mode;
|
||||
int8_t reserved[60];
|
||||
};
|
||||
|
||||
Error PS4_SYSV_ABI sceImeDialogAbort();
|
||||
Error PS4_SYSV_ABI sceImeDialogForceClose();
|
||||
Error PS4_SYSV_ABI sceImeDialogForTestFunction();
|
||||
|
@ -21,12 +21,16 @@ namespace Libraries::ImeDialog {
|
||||
|
||||
ImeDialogState::ImeDialogState(const OrbisImeDialogParam* param,
|
||||
const OrbisImeParamExtended* extended) {
|
||||
LOG_INFO(Lib_ImeDialog, ">> ImeDialogState::Ctor: param={}, text_buffer={}",
|
||||
static_cast<const void*>(param),
|
||||
static_cast<void*>(param ? param->input_text_buffer : nullptr));
|
||||
if (!param) {
|
||||
LOG_ERROR(Lib_ImeDialog, " param==nullptr, returning without init");
|
||||
return;
|
||||
}
|
||||
|
||||
user_id = param->user_id;
|
||||
is_multi_line = True(param->option & OrbisImeDialogOption::Multiline);
|
||||
is_multi_line = True(param->option & OrbisImeOption::MULTILINE);
|
||||
is_numeric = param->type == OrbisImeType::Number;
|
||||
type = param->type;
|
||||
enter_label = param->enter_label;
|
||||
@ -220,6 +224,7 @@ void ImeDialogUi::Free() {
|
||||
|
||||
void ImeDialogUi::Draw() {
|
||||
std::unique_lock lock{draw_mutex};
|
||||
LOG_INFO(Lib_ImeDialog, ">> ImeDialogUi::Draw: first_render=%d", first_render);
|
||||
|
||||
if (!state) {
|
||||
return;
|
||||
@ -259,9 +264,13 @@ void ImeDialogUi::Draw() {
|
||||
}
|
||||
|
||||
if (state->is_multi_line) {
|
||||
LOG_INFO(Lib_ImeDialog, " Drawing multi-line widget…");
|
||||
DrawMultiLineInputText();
|
||||
LOG_INFO(Lib_ImeDialog, " Done DrawMultiLineInputText");
|
||||
} else {
|
||||
LOG_INFO(Lib_ImeDialog, " Drawing input text widget…");
|
||||
DrawInputText();
|
||||
LOG_INFO(Lib_ImeDialog, " Done DrawInputText");
|
||||
}
|
||||
|
||||
SetCursorPosY(GetCursorPosY() + 10.0f);
|
||||
@ -306,6 +315,7 @@ void ImeDialogUi::Draw() {
|
||||
End();
|
||||
|
||||
first_render = false;
|
||||
LOG_INFO(Lib_ImeDialog, "<< ImeDialogUi::Draw complete");
|
||||
}
|
||||
|
||||
void ImeDialogUi::DrawInputText() {
|
||||
@ -316,7 +326,7 @@ void ImeDialogUi::DrawInputText() {
|
||||
}
|
||||
const char* placeholder = state->placeholder.empty() ? nullptr : state->placeholder.data();
|
||||
if (InputTextEx("##ImeDialogInput", placeholder, state->current_text.begin(),
|
||||
state->max_text_length, input_size, ImGuiInputTextFlags_CallbackCharFilter,
|
||||
state->max_text_length + 1, input_size, ImGuiInputTextFlags_CallbackCharFilter,
|
||||
InputTextCallback, this)) {
|
||||
state->input_changed = true;
|
||||
}
|
||||
@ -332,7 +342,7 @@ void ImeDialogUi::DrawMultiLineInputText() {
|
||||
}
|
||||
const char* placeholder = state->placeholder.empty() ? nullptr : state->placeholder.data();
|
||||
if (InputTextEx("##ImeDialogInput", placeholder, state->current_text.begin(),
|
||||
state->max_text_length, input_size, flags, InputTextCallback, this)) {
|
||||
state->max_text_length + 1, input_size, flags, InputTextCallback, this)) {
|
||||
state->input_changed = true;
|
||||
}
|
||||
}
|
||||
@ -341,13 +351,19 @@ int ImeDialogUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
|
||||
ImeDialogUi* ui = static_cast<ImeDialogUi*>(data->UserData);
|
||||
ASSERT(ui);
|
||||
|
||||
LOG_DEBUG(Lib_ImeDialog, ">> InputTextCallback: EventFlag={}, EventChar={}", data->EventFlag,
|
||||
data->EventChar);
|
||||
|
||||
// Should we filter punctuation?
|
||||
if (ui->state->is_numeric && (data->EventChar < '0' || data->EventChar > '9') &&
|
||||
data->EventChar != '\b' && data->EventChar != ',' && data->EventChar != '.') {
|
||||
LOG_INFO(Lib_ImeDialog, "InputTextCallback: rejecting non-digit char '{}'",
|
||||
static_cast<char>(data->EventChar));
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!ui->state->keyboard_filter) {
|
||||
LOG_DEBUG(Lib_ImeDialog, "InputTextCallback: no keyboard_filter, accepting char");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -363,20 +379,24 @@ int ImeDialogUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
|
||||
// the current language?)
|
||||
.user_id = ui->state->user_id,
|
||||
.resource_id = 0,
|
||||
.timestamp = 0,
|
||||
.timestamp = {0},
|
||||
};
|
||||
|
||||
if (!ui->state->ConvertUTF8ToOrbis(event_char, 4, &src_keycode.character, 1)) {
|
||||
LOG_ERROR(Lib_ImeDialog, "Failed to convert orbis char to utf8");
|
||||
LOG_ERROR(Lib_ImeDialog, "InputTextCallback: ConvertUTF8ToOrbis failed");
|
||||
return 0;
|
||||
}
|
||||
LOG_DEBUG(Lib_ImeDialog, "InputTextCallback: converted to Orbis char={:#X}",
|
||||
static_cast<uint16_t>(src_keycode.character));
|
||||
src_keycode.keycode = src_keycode.character; // TODO set this to the correct value
|
||||
|
||||
u16 out_keycode;
|
||||
u32 out_status;
|
||||
|
||||
ui->state->CallKeyboardFilter(&src_keycode, &out_keycode, &out_status);
|
||||
|
||||
bool keep = ui->state->CallKeyboardFilter(&src_keycode, &out_keycode, &out_status);
|
||||
LOG_DEBUG(Lib_ImeDialog,
|
||||
"InputTextCallback: CallKeyboardFilter returned %s (keycode=0x%X, status=0x%X)",
|
||||
keep ? "true" : "false", out_keycode, out_status);
|
||||
// TODO. set the keycode
|
||||
|
||||
return 0;
|
||||
|
@ -199,7 +199,7 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
|
||||
eventParam.caret_index = data->CursorPos;
|
||||
eventParam.area_num = 1;
|
||||
|
||||
eventParam.text_area[0].mode = 1; // Edit mode
|
||||
eventParam.text_area[0].mode = OrbisImeTextAreaMode::Edit;
|
||||
eventParam.text_area[0].index = data->CursorPos;
|
||||
eventParam.text_area[0].length = data->BufTextLen;
|
||||
|
||||
|
@ -445,7 +445,8 @@ s32 PS4_SYSV_ABI sceVideoOutConfigureOutputMode_(s32 handle, u32 reserved, const
|
||||
}
|
||||
|
||||
void RegisterLib(Core::Loader::SymbolsResolver* sym) {
|
||||
driver = std::make_unique<VideoOutDriver>(Config::getScreenWidth(), Config::getScreenHeight());
|
||||
driver = std::make_unique<VideoOutDriver>(Config::getInternalScreenWidth(),
|
||||
Config::getInternalScreenHeight());
|
||||
|
||||
LIB_FUNCTION("SbU3dwp80lQ", "libSceVideoOut", 1, "libSceVideoOut", 0, 0,
|
||||
sceVideoOutGetFlipStatus);
|
||||
|
@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
|
||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
||||
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
||||
LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks());
|
||||
LOG_INFO(Config, "GPU readbackLinearImages: {}", Config::readbackLinearImages());
|
||||
LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess());
|
||||
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
||||
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
||||
@ -222,7 +223,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector<std::string> ar
|
||||
}
|
||||
}
|
||||
window = std::make_unique<Frontend::WindowSDL>(
|
||||
Config::getScreenWidth(), Config::getScreenHeight(), controller, window_title);
|
||||
Config::getWindowWidth(), Config::getWindowHeight(), controller, window_title);
|
||||
|
||||
g_window = window.get();
|
||||
|
||||
|
@ -762,8 +762,8 @@ void SettingsDialog::UpdateSettings() {
|
||||
m_gui_settings->SetValue(gui::gl_backgroundMusicVolume, ui->BGMVolumeSlider->value());
|
||||
Config::setLanguage(languageIndexes[ui->consoleLanguageComboBox->currentIndex()]);
|
||||
Config::setEnableDiscordRPC(ui->discordRPCCheckbox->isChecked());
|
||||
Config::setScreenWidth(ui->widthSpinBox->value());
|
||||
Config::setScreenHeight(ui->heightSpinBox->value());
|
||||
Config::setWindowWidth(ui->widthSpinBox->value());
|
||||
Config::setWindowHeight(ui->heightSpinBox->value());
|
||||
Config::setVblankDiv(ui->vblankSpinBox->value());
|
||||
Config::setDumpShaders(ui->dumpShadersCheckBox->isChecked());
|
||||
Config::setNullGpu(ui->nullGpuCheckBox->isChecked());
|
||||
|
@ -54,17 +54,23 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value,
|
||||
});
|
||||
}
|
||||
|
||||
Id SharedAtomicU64IncDec(EmitContext& ctx, Id offset,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
|
||||
const Id shift_id{ctx.ConstU32(3U)};
|
||||
const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)};
|
||||
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
|
||||
const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)};
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
|
||||
return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics);
|
||||
});
|
||||
}
|
||||
|
||||
template <bool is_float = false>
|
||||
Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
const auto type = [&] {
|
||||
if constexpr (is_float) {
|
||||
return ctx.F32[1];
|
||||
} else {
|
||||
return ctx.U32[1];
|
||||
}
|
||||
}();
|
||||
const Id type = is_float ? ctx.F32[1] : ctx.U32[1];
|
||||
if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, offset);
|
||||
}
|
||||
@ -148,42 +154,82 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMax);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMax);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMin);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMin);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicAnd);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicOr);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicXor);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value) {
|
||||
return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicISub);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) {
|
||||
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset) {
|
||||
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) {
|
||||
return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||
}
|
||||
|
||||
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset) {
|
||||
return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
@ -139,15 +139,25 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset);
|
||||
Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset);
|
||||
Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset);
|
||||
Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset);
|
||||
Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value);
|
||||
Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value);
|
||||
|
||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
|
||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
|
||||
@ -519,8 +529,10 @@ Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitWarpId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
Id EmitReadFirstLane(EmitContext& ctx, Id value);
|
||||
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
|
||||
Id EmitReadLane(EmitContext& ctx, Id value, Id lane);
|
||||
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
|
||||
Id EmitBallot(EmitContext& ctx, Id bit);
|
||||
Id EmitBallotFindLsb(EmitContext& ctx, Id mask);
|
||||
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
|
||||
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);
|
||||
|
||||
|
@ -26,13 +26,20 @@ Id EmitReadFirstLane(EmitContext& ctx, Id value) {
|
||||
return ctx.OpGroupNonUniformBroadcastFirst(ctx.U32[1], SubgroupScope(ctx), value);
|
||||
}
|
||||
|
||||
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
|
||||
return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value,
|
||||
ctx.ConstU32(lane));
|
||||
Id EmitReadLane(EmitContext& ctx, Id value, Id lane) {
|
||||
return ctx.OpGroupNonUniformBroadcast(ctx.U32[1], SubgroupScope(ctx), value, lane);
|
||||
}
|
||||
|
||||
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
|
||||
return ctx.u32_zero_value;
|
||||
}
|
||||
|
||||
Id EmitBallot(EmitContext& ctx, Id bit) {
|
||||
return ctx.OpGroupNonUniformBallot(ctx.U32[4], SubgroupScope(ctx), bit);
|
||||
}
|
||||
|
||||
Id EmitBallotFindLsb(EmitContext& ctx, Id mask) {
|
||||
return ctx.OpGroupNonUniformBallotFindLSB(ctx.U32[1], SubgroupScope(ctx), mask);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -76,6 +76,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
||||
} else {
|
||||
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
|
||||
}
|
||||
String(fmt::format("{:#x}", info.pgm_hash));
|
||||
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
@ -700,7 +701,7 @@ void EmitContext::DefineOutputs() {
|
||||
void EmitContext::DefinePushDataBlock() {
|
||||
// Create push constants block for instance steps rates
|
||||
const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4],
|
||||
U32[4], U32[4], U32[4], U32[4], U32[4]),
|
||||
U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]),
|
||||
"AuxData")};
|
||||
Decorate(struct_type, spv::Decoration::Block);
|
||||
MemberName(struct_type, PushData::Step0Index, "sr0");
|
||||
@ -715,6 +716,7 @@ void EmitContext::DefinePushDataBlock() {
|
||||
MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3");
|
||||
MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0");
|
||||
MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1");
|
||||
MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2");
|
||||
MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U);
|
||||
MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U);
|
||||
MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U);
|
||||
@ -727,6 +729,7 @@ void EmitContext::DefinePushDataBlock() {
|
||||
MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U);
|
||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U);
|
||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U);
|
||||
MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U);
|
||||
push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant);
|
||||
Name(push_data_block, "push_data");
|
||||
interfaces.push_back(push_data_block);
|
||||
|
@ -188,14 +188,15 @@ void CFG::SplitDivergenceScopes() {
|
||||
const bool is_close = is_close_scope(inst);
|
||||
if ((is_close || index == blk->end_index) && curr_begin != -1) {
|
||||
// If there are no instructions inside scope don't do anything.
|
||||
if (index - curr_begin == 1) {
|
||||
if (index - curr_begin == 1 && is_close) {
|
||||
curr_begin = -1;
|
||||
continue;
|
||||
}
|
||||
// If all instructions in the scope ignore exec masking, we shouldn't insert a
|
||||
// scope.
|
||||
const auto start = inst_list.begin() + curr_begin + 1;
|
||||
if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) {
|
||||
if (!std::ranges::all_of(start, inst_list.begin() + index + !is_close,
|
||||
IgnoresExecMask)) {
|
||||
// Determine the first instruction affected by the exec mask.
|
||||
do {
|
||||
++curr_begin;
|
||||
|
@ -397,7 +397,7 @@ constexpr std::array<InstFormat, 27> InstructionFormatSOPP = {{
|
||||
// 17 = S_SENDMSGHALT
|
||||
{InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||
// 18 = S_TRAP
|
||||
{InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||
{InstClass::Undefined, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||
// 19 = S_ICACHE_INV
|
||||
{InstClass::ScalarCache, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any},
|
||||
// 20 = S_INCPERFLEVEL
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
@ -12,29 +11,29 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
switch (inst.opcode) {
|
||||
// DS
|
||||
case Opcode::DS_ADD_U32:
|
||||
return DS_ADD_U32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::Add, false);
|
||||
case Opcode::DS_ADD_U64:
|
||||
return DS_ADD_U64(inst, false);
|
||||
return DS_OP<IR::U64>(inst, AtomicOp::Add, false);
|
||||
case Opcode::DS_SUB_U32:
|
||||
return DS_SUB_U32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::Sub, false);
|
||||
case Opcode::DS_INC_U32:
|
||||
return DS_INC_U32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::Inc, false);
|
||||
case Opcode::DS_DEC_U32:
|
||||
return DS_DEC_U32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::Dec, false);
|
||||
case Opcode::DS_MIN_I32:
|
||||
return DS_MIN_U32(inst, true, false);
|
||||
return DS_OP(inst, AtomicOp::Smin, false);
|
||||
case Opcode::DS_MAX_I32:
|
||||
return DS_MAX_U32(inst, true, false);
|
||||
return DS_OP(inst, AtomicOp::Smax, false);
|
||||
case Opcode::DS_MIN_U32:
|
||||
return DS_MIN_U32(inst, false, false);
|
||||
return DS_OP(inst, AtomicOp::Umin, false);
|
||||
case Opcode::DS_MAX_U32:
|
||||
return DS_MAX_U32(inst, false, false);
|
||||
return DS_OP(inst, AtomicOp::Umax, false);
|
||||
case Opcode::DS_AND_B32:
|
||||
return DS_AND_B32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::And, false);
|
||||
case Opcode::DS_OR_B32:
|
||||
return DS_OR_B32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::Or, false);
|
||||
case Opcode::DS_XOR_B32:
|
||||
return DS_XOR_B32(inst, false);
|
||||
return DS_OP(inst, AtomicOp::Xor, false);
|
||||
case Opcode::DS_WRITE_B32:
|
||||
return DS_WRITE(32, false, false, false, inst);
|
||||
case Opcode::DS_WRITE2_B32:
|
||||
@ -42,19 +41,19 @@ void Translator::EmitDataShare(const GcnInst& inst) {
|
||||
case Opcode::DS_WRITE2ST64_B32:
|
||||
return DS_WRITE(32, false, true, true, inst);
|
||||
case Opcode::DS_ADD_RTN_U32:
|
||||
return DS_ADD_U32(inst, true);
|
||||
return DS_OP(inst, AtomicOp::Add, true);
|
||||
case Opcode::DS_SUB_RTN_U32:
|
||||
return DS_SUB_U32(inst, true);
|
||||
return DS_OP(inst, AtomicOp::Sub, true);
|
||||
case Opcode::DS_MIN_RTN_U32:
|
||||
return DS_MIN_U32(inst, false, true);
|
||||
return DS_OP(inst, AtomicOp::Umin, true);
|
||||
case Opcode::DS_MAX_RTN_U32:
|
||||
return DS_MAX_U32(inst, false, true);
|
||||
return DS_OP(inst, AtomicOp::Umax, true);
|
||||
case Opcode::DS_AND_RTN_B32:
|
||||
return DS_AND_B32(inst, true);
|
||||
return DS_OP(inst, AtomicOp::And, true);
|
||||
case Opcode::DS_OR_RTN_B32:
|
||||
return DS_OR_B32(inst, true);
|
||||
return DS_OP(inst, AtomicOp::Or, true);
|
||||
case Opcode::DS_XOR_RTN_B32:
|
||||
return DS_XOR_B32(inst, true);
|
||||
return DS_OP(inst, AtomicOp::Xor, true);
|
||||
case Opcode::DS_SWIZZLE_B32:
|
||||
return DS_SWIZZLE_B32(inst);
|
||||
case Opcode::DS_READ_B32:
|
||||
@ -117,92 +116,63 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
||||
|
||||
// DS
|
||||
|
||||
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
||||
template <typename T>
|
||||
void Translator::DS_OP(const GcnInst& inst, AtomicOp op, bool rtn) {
|
||||
const bool is_gds = inst.control.ds.gds;
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const T data = [&] {
|
||||
if (op == AtomicOp::Inc || op == AtomicOp::Dec) {
|
||||
return T{};
|
||||
}
|
||||
if constexpr (std::is_same_v<T, IR::U32>) {
|
||||
return GetSrc(inst.src[1]);
|
||||
} else {
|
||||
return GetSrc64(inst.src[1]);
|
||||
}
|
||||
}();
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
const T original_val = [&] -> T {
|
||||
switch (op) {
|
||||
case AtomicOp::Add:
|
||||
return ir.SharedAtomicIAdd(addr_offset, data, is_gds);
|
||||
case AtomicOp::Umin:
|
||||
return ir.SharedAtomicIMin(addr_offset, data, false, is_gds);
|
||||
case AtomicOp::Smin:
|
||||
return ir.SharedAtomicIMin(addr_offset, data, true, is_gds);
|
||||
case AtomicOp::Umax:
|
||||
return ir.SharedAtomicIMax(addr_offset, data, false, is_gds);
|
||||
case AtomicOp::Smax:
|
||||
return ir.SharedAtomicIMax(addr_offset, data, true, is_gds);
|
||||
case AtomicOp::And:
|
||||
return ir.SharedAtomicAnd(addr_offset, data, is_gds);
|
||||
case AtomicOp::Or:
|
||||
return ir.SharedAtomicOr(addr_offset, data, is_gds);
|
||||
case AtomicOp::Xor:
|
||||
return ir.SharedAtomicXor(addr_offset, data, is_gds);
|
||||
case AtomicOp::Sub:
|
||||
return ir.SharedAtomicISub(addr_offset, data, is_gds);
|
||||
case AtomicOp::Inc:
|
||||
return ir.SharedAtomicInc<T>(addr_offset, is_gds);
|
||||
case AtomicOp::Dec:
|
||||
return ir.SharedAtomicDec<T>(addr_offset, is_gds);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U64 data{GetSrc64(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst64(inst.dst[0], IR::U64{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
if constexpr (std::is_same_v<T, IR::U32>) {
|
||||
SetDst(inst.dst[0], original_val);
|
||||
} else {
|
||||
SetDst64(inst.dst[0], original_val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const bool is_gds = inst.control.ds.gds;
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
const IR::VectorReg data0{inst.src[1].code};
|
||||
const IR::VectorReg data1{inst.src[2].code};
|
||||
@ -220,33 +190,85 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
||||
ir.WriteShared(64,
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
|
||||
ir.GetVectorReg(data0 + 1))),
|
||||
addr0);
|
||||
addr0, is_gds);
|
||||
} else if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
||||
ir.WriteShared(32, ir.GetVectorReg(data0), addr0, is_gds);
|
||||
} else if (bit_size == 16) {
|
||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
|
||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
if (bit_size == 64) {
|
||||
ir.WriteShared(64,
|
||||
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
|
||||
ir.GetVectorReg(data1 + 1))),
|
||||
addr1);
|
||||
addr1, is_gds);
|
||||
} else if (bit_size == 32) {
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
||||
ir.WriteShared(32, ir.GetVectorReg(data1), addr1, is_gds);
|
||||
} else if (bit_size == 16) {
|
||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1);
|
||||
ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1, is_gds);
|
||||
}
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
if (bit_size == 64) {
|
||||
const IR::Value data =
|
||||
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
||||
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0);
|
||||
ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0, is_gds);
|
||||
} else if (bit_size == 32) {
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0, is_gds);
|
||||
} else if (bit_size == 16) {
|
||||
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0);
|
||||
ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const bool is_gds = inst.control.ds.gds;
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
||||
if (info.stage == Stage::Fragment) {
|
||||
ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0,
|
||||
"Unexpected shared memory offset alignment: {}", offset);
|
||||
ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset)));
|
||||
return;
|
||||
}
|
||||
if (is_pair) {
|
||||
// Pair loads are either 32 or 64-bit
|
||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
|
||||
if (bit_size == 64) {
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||
} else if (bit_size == 16) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1, is_gds);
|
||||
if (bit_size == 64) {
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||
} else if (bit_size == 16) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data1})});
|
||||
}
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0, is_gds);
|
||||
if (bit_size == 64) {
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{data});
|
||||
} else if (bit_size == 16) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data})});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -263,91 +285,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
||||
}
|
||||
|
||||
void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicInc(addr_offset);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicDec(addr_offset);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) {
|
||||
const IR::U32 addr{GetSrc(inst.src[0])};
|
||||
const IR::U32 data{GetSrc(inst.src[1])};
|
||||
const IR::U32 offset =
|
||||
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||
const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data);
|
||||
if (rtn) {
|
||||
SetDst(inst.dst[0], IR::U32{original_val});
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0;
|
||||
if (info.stage == Stage::Fragment) {
|
||||
ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0,
|
||||
"Unexpected shared memory offset alignment: {}", offset);
|
||||
ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset)));
|
||||
return;
|
||||
}
|
||||
if (is_pair) {
|
||||
// Pair loads are either 32 or 64-bit
|
||||
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
||||
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
||||
if (bit_size == 64) {
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data0});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
||||
} else if (bit_size == 16) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})});
|
||||
}
|
||||
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
||||
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
||||
if (bit_size == 64) {
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data1});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
||||
} else if (bit_size == 16) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data1})});
|
||||
}
|
||||
} else {
|
||||
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));
|
||||
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
||||
if (bit_size == 64) {
|
||||
const auto vector = ir.UnpackUint2x32(IR::U64{data});
|
||||
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)});
|
||||
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)});
|
||||
} else if (bit_size == 32) {
|
||||
ir.SetVectorReg(dst_reg, IR::U32{data});
|
||||
} else if (bit_size == 16) {
|
||||
ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data})});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Translator::DS_APPEND(const GcnInst& inst) {
|
||||
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
||||
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
||||
|
@ -586,6 +586,15 @@ void Translator::S_MOV(const GcnInst& inst) {
|
||||
}
|
||||
|
||||
void Translator::S_MOV_B64(const GcnInst& inst) {
|
||||
// Moving SGPR to SGPR is used for thread masks, like most operations, but it can also be used
|
||||
// for moving sharps.
|
||||
if (inst.dst[0].field == OperandField::ScalarGPR &&
|
||||
inst.src[0].field == OperandField::ScalarGPR) {
|
||||
ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code),
|
||||
ir.GetScalarReg(IR::ScalarReg(inst.src[0].code)));
|
||||
ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code + 1),
|
||||
ir.GetScalarReg(IR::ScalarReg(inst.src[0].code + 1)));
|
||||
}
|
||||
const IR::U1 src = [&] {
|
||||
switch (inst.src[0].field) {
|
||||
case OperandField::VccLo:
|
||||
@ -671,8 +680,9 @@ void Translator::S_FF1_I32_B32(const GcnInst& inst) {
|
||||
}
|
||||
|
||||
void Translator::S_FF1_I32_B64(const GcnInst& inst) {
|
||||
const IR::U64 src0{GetSrc64(inst.src[0])};
|
||||
const IR::U32 result{ir.FindILsb(src0)};
|
||||
ASSERT(inst.src[0].field == OperandField::ScalarGPR);
|
||||
const IR::U32 result{
|
||||
ir.BallotFindLsb(ir.Ballot(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))))};
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,9 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
|
||||
case Opcode::S_SETPRIO:
|
||||
LOG_WARNING(Render_Vulkan, "S_SETPRIO instruction!");
|
||||
return;
|
||||
case Opcode::S_TRAP:
|
||||
LOG_WARNING(Render_Vulkan, "S_TRAP instruction!");
|
||||
return;
|
||||
case Opcode::S_GETPC_B64:
|
||||
return S_GETPC_B64(pc, inst);
|
||||
case Opcode::S_SETPC_B64:
|
||||
|
@ -270,21 +270,13 @@ public:
|
||||
|
||||
// Data share
|
||||
// DS
|
||||
void DS_ADD_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_ADD_U64(const GcnInst& inst, bool rtn);
|
||||
void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn);
|
||||
template <typename T = IR::U32>
|
||||
void DS_OP(const GcnInst& inst, AtomicOp op, bool rtn);
|
||||
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_AND_B32(const GcnInst& inst, bool rtn);
|
||||
void DS_OR_B32(const GcnInst& inst, bool rtn);
|
||||
void DS_XOR_B32(const GcnInst& inst, bool rtn);
|
||||
void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst);
|
||||
void DS_SWIZZLE_B32(const GcnInst& inst);
|
||||
void DS_APPEND(const GcnInst& inst);
|
||||
void DS_CONSUME(const GcnInst& inst);
|
||||
void DS_SUB_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_INC_U32(const GcnInst& inst, bool rtn);
|
||||
void DS_DEC_U32(const GcnInst& inst, bool rtn);
|
||||
|
||||
// Buffer Memory
|
||||
// MUBUF / MTBUF
|
||||
|
@ -565,7 +565,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||
}
|
||||
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ
|
||||
if ((inst.src[0].field == OperandField::ExecHi ||
|
||||
inst.src[0].field == OperandField::VccHi) &&
|
||||
inst.src[0].field == OperandField::VccHi ||
|
||||
inst.src[0].field == OperandField::ScalarGPR) &&
|
||||
(inst.src[1].field == OperandField::ConstZero ||
|
||||
inst.src[1].field == OperandField::VectorGPR)) {
|
||||
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
||||
@ -579,7 +580,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||
}
|
||||
// v_mbcnt_lo_u32_b32 vY, exec_lo, vX
|
||||
// used combined with above for append buffer indexing.
|
||||
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) {
|
||||
if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo ||
|
||||
inst.src[0].field == OperandField::ScalarGPR) {
|
||||
return SetDst(inst.dst[0], GetSrc(inst.src[1]));
|
||||
}
|
||||
UNREACHABLE();
|
||||
|
@ -192,9 +192,10 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
|
||||
const IR::VectorReg vaddr{inst.src[0].code};
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
const bool has_soffset = !soffset.IsImmediate() || soffset.U32() != 0;
|
||||
if (info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
ASSERT_MSG(!has_soffset || !mubuf.offen,
|
||||
"Having both scalar and vector offsets is not supported");
|
||||
}
|
||||
|
||||
const IR::Value address = [&] -> IR::Value {
|
||||
@ -204,15 +205,21 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_
|
||||
if (mubuf.idxen && mubuf.offen) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
|
||||
}
|
||||
if (mubuf.idxen && has_soffset) {
|
||||
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);
|
||||
}
|
||||
if (mubuf.idxen || mubuf.offen) {
|
||||
return ir.GetVectorReg(vaddr);
|
||||
}
|
||||
if (has_soffset) {
|
||||
return soffset;
|
||||
}
|
||||
return {};
|
||||
}();
|
||||
|
||||
IR::BufferInstInfo buffer_info{};
|
||||
buffer_info.index_enable.Assign(mubuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mubuf.offen);
|
||||
buffer_info.offset_enable.Assign(mubuf.offen || has_soffset);
|
||||
buffer_info.inst_offset.Assign(mubuf.offset);
|
||||
buffer_info.globally_coherent.Assign(mubuf.glc);
|
||||
buffer_info.system_coherent.Assign(mubuf.slc);
|
||||
|
@ -25,7 +25,7 @@ namespace Shader {
|
||||
|
||||
static constexpr size_t NumUserDataRegs = 16;
|
||||
static constexpr size_t NumImages = 64;
|
||||
static constexpr size_t NumBuffers = 32;
|
||||
static constexpr size_t NumBuffers = 40;
|
||||
static constexpr size_t NumSamplers = 16;
|
||||
static constexpr size_t NumFMasks = 8;
|
||||
|
||||
|
@ -291,78 +291,137 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
Inst(Opcode::SetPatch, patch, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset, bool is_gds) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
||||
return Inst<U16>(Opcode::LoadSharedU16, Flags{is_gds}, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
return Inst<U32>(Opcode::LoadSharedU32, Flags{is_gds}, offset);
|
||||
case 64:
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
return Inst<U64>(Opcode::LoadSharedU64, Flags{is_gds}, offset);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
Inst(Opcode::WriteSharedU16, Flags{is_gds}, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
Inst(Opcode::WriteSharedU32, Flags{is_gds}, offset, value);
|
||||
break;
|
||||
case 64:
|
||||
Inst(Opcode::WriteSharedU64, offset, value);
|
||||
Inst(Opcode::WriteSharedU64, Flags{is_gds}, offset, value);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMin32 : Opcode::SharedAtomicUMin32,
|
||||
Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMin64 : Opcode::SharedAtomicUMin64,
|
||||
Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMax32 : Opcode::SharedAtomicUMax32,
|
||||
Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMax64 : Opcode::SharedAtomicUMax64,
|
||||
Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) {
|
||||
U32U64 IREmitter::SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicXor32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicXor64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicInc(const U32& address) {
|
||||
return Inst<U32>(Opcode::SharedAtomicInc32, address);
|
||||
U32U64 IREmitter::SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicISub32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicISub64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicDec(const U32& address) {
|
||||
return Inst<U32>(Opcode::SharedAtomicDec32, address);
|
||||
template <>
|
||||
U32 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||
return Inst<U32>(Opcode::SharedAtomicInc32, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicISub32, address, data);
|
||||
template <>
|
||||
U64 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||
return Inst<U64>(Opcode::SharedAtomicInc64, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
template <>
|
||||
U32 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||
return Inst<U32>(Opcode::SharedAtomicDec32, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
template <>
|
||||
U64 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||
return Inst<U64>(Opcode::SharedAtomicDec64, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
@ -601,6 +660,14 @@ U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& la
|
||||
return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
|
||||
}
|
||||
|
||||
Value IREmitter::Ballot(const U1& bit) {
|
||||
return Inst(Opcode::Ballot, bit);
|
||||
}
|
||||
|
||||
U32 IREmitter::BallotFindLsb(const Value& mask) {
|
||||
return Inst<U32>(Opcode::BallotFindLsb, mask);
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
||||
if (a.Type() != b.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
|
@ -96,18 +96,24 @@ public:
|
||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||
void SetPatch(Patch patch, const F32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset,
|
||||
bool is_gds = false);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds = false);
|
||||
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
||||
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicInc(const U32& address);
|
||||
[[nodiscard]] U32 SharedAtomicDec(const U32& address);
|
||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds);
|
||||
|
||||
template <typename T = U32>
|
||||
[[nodiscard]] T SharedAtomicInc(const U32& address, bool is_gds);
|
||||
template <typename T = U32>
|
||||
[[nodiscard]] T SharedAtomicDec(const U32& address, bool is_gds);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
@ -170,6 +176,8 @@ public:
|
||||
[[nodiscard]] U32 ReadFirstLane(const U32& value);
|
||||
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
|
||||
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
|
||||
[[nodiscard]] Value Ballot(const U1& bit);
|
||||
[[nodiscard]] U32 BallotFindLsb(const Value& mask);
|
||||
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||
|
@ -92,7 +92,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::SharedAtomicIAdd32:
|
||||
case Opcode::SharedAtomicIAdd64:
|
||||
case Opcode::SharedAtomicISub32:
|
||||
case Opcode::SharedAtomicSMin32:
|
||||
case Opcode::SharedAtomicUMin32:
|
||||
@ -103,6 +102,17 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::SharedAtomicAnd32:
|
||||
case Opcode::SharedAtomicOr32:
|
||||
case Opcode::SharedAtomicXor32:
|
||||
case Opcode::SharedAtomicIAdd64:
|
||||
case Opcode::SharedAtomicISub64:
|
||||
case Opcode::SharedAtomicSMin64:
|
||||
case Opcode::SharedAtomicUMin64:
|
||||
case Opcode::SharedAtomicSMax64:
|
||||
case Opcode::SharedAtomicUMax64:
|
||||
case Opcode::SharedAtomicInc64:
|
||||
case Opcode::SharedAtomicDec64:
|
||||
case Opcode::SharedAtomicAnd64:
|
||||
case Opcode::SharedAtomicOr64:
|
||||
case Opcode::SharedAtomicXor64:
|
||||
case Opcode::ImageWrite:
|
||||
case Opcode::ImageAtomicIAdd32:
|
||||
case Opcode::ImageAtomicSMin32:
|
||||
|
@ -41,15 +41,25 @@ OPCODE(WriteSharedU64, Void, U32,
|
||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicISub64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMin64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMin64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMax64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMax64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicInc32, U32, U32, )
|
||||
OPCODE(SharedAtomicInc64, U64, U32, )
|
||||
OPCODE(SharedAtomicDec32, U32, U32, )
|
||||
OPCODE(SharedAtomicDec64, U64, U32, )
|
||||
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicAnd64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicOr64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicXor64, U64, U32, U64, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
@ -462,5 +472,7 @@ OPCODE(QuadShuffle, U32, U32,
|
||||
OPCODE(ReadFirstLane, U32, U32, )
|
||||
OPCODE(ReadLane, U32, U32, U32 )
|
||||
OPCODE(WriteLane, U32, U32, U32, U32 )
|
||||
OPCODE(Ballot, U32x4, U1, )
|
||||
OPCODE(BallotFindLsb, U32, U32x4, )
|
||||
OPCODE(DataAppend, U32, U32, U32 )
|
||||
OPCODE(DataConsume, U32, U32, U32 )
|
||||
|
@ -95,6 +95,10 @@ void ReadLaneEliminationPass(IR::Program& program) {
|
||||
if (inst.GetOpcode() != IR::Opcode::ReadLane) {
|
||||
continue;
|
||||
}
|
||||
if (!inst.Arg(1).IsImmediate()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 lane = inst.Arg(1).U32();
|
||||
IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||
|
||||
|
@ -84,8 +84,42 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
}
|
||||
|
||||
bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::DataAppend ||
|
||||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::DataAppend:
|
||||
case IR::Opcode::DataConsume:
|
||||
return true;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicUMin32:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMax32:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicInc32:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec32:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
return inst.Flags<bool>(); // is_gds
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
@ -507,7 +541,8 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
@ -515,37 +550,111 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
u32 gds_addr = 0;
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||
} else {
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
gds_addr = m0_val & 0xFFFF;
|
||||
}
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
|
||||
// For data append/consume operations attempt to deduce the GDS address.
|
||||
if (inst.GetOpcode() == IR::Opcode::DataAppend || inst.GetOpcode() == IR::Opcode::DataConsume) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
u32 gds_addr = 0;
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||
} else {
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
gds_addr = m0_val & 0xFFFF;
|
||||
}
|
||||
|
||||
// Patch instruction.
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
} else {
|
||||
// Convert shared memory opcode to storage buffer atomic to GDS buffer.
|
||||
const IR::U32 offset = IR::U32{inst.Arg(0)};
|
||||
const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
|
||||
const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
|
||||
const IR::U32 address_qwords = ir.ShiftRightLogical(offset, ir.Imm32(3));
|
||||
const IR::U32 handle = ir.Imm32(binding);
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicIAdd(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
inst.ReplaceUsesWith(
|
||||
ir.BufferAtomicIAdd(handle, address_qwords, IR::U64{inst.Arg(1)}, {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicISub(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicUMin32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||
inst.ReplaceUsesWith(
|
||||
ir.BufferAtomicIMin(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicUMax32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||
inst.ReplaceUsesWith(
|
||||
ir.BufferAtomicIMax(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SharedAtomicInc32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicInc(handle, address_dwords, {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicDec32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicDec(handle, address_dwords, {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicAnd(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicOr(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||
@ -916,8 +1025,6 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -929,6 +1036,8 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchGlobalDataShareAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -55,6 +55,16 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
info.shared_types |= IR::Type::U32;
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
info.uses_shared_int64_atomics = true;
|
||||
[[fallthrough]];
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
|
@ -15,6 +15,16 @@ static bool Requires16BitSharedAtomic(const IR::Inst& inst) {
|
||||
static bool Requires64BitSharedAtomic(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
@ -17,7 +17,6 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicUMin32:
|
||||
@ -28,6 +27,17 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -64,6 +74,16 @@ IR::Type CalculateSharedMemoryTypes(IR::Program& program) {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
used_types |= IR::Type::U64;
|
||||
break;
|
||||
default:
|
||||
@ -119,19 +139,26 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicISub(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicUMin32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||
case IR::Opcode::SharedAtomicUMin32:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32 ||
|
||||
inst.GetOpcode() == IR::Opcode::SharedAtomicSMin64;
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {}));
|
||||
continue;
|
||||
}
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicUMax32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||
case IR::Opcode::SharedAtomicUMax32:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32 ||
|
||||
inst.GetOpcode() == IR::Opcode::SharedAtomicSMax64;
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {}));
|
||||
continue;
|
||||
@ -143,12 +170,15 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
@ -173,7 +203,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
inst.Invalidate();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -135,9 +135,8 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
|
||||
if (submit_done) {
|
||||
VideoCore::EndCapture();
|
||||
|
||||
if (rasterizer) {
|
||||
rasterizer->ProcessFaults();
|
||||
rasterizer->EndCommandList();
|
||||
rasterizer->Flush();
|
||||
}
|
||||
submit_done = false;
|
||||
@ -604,6 +603,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
// TODO: handle proper synchronization, for now signal that update is done
|
||||
// immediately
|
||||
regs.cp_strmout_cntl.offset_update_done = 1;
|
||||
} else if (event->event_index.Value() == EventIndex::ZpassDone) {
|
||||
LOG_WARNING(Render, "Unimplemented occlusion query");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ struct Liverpool {
|
||||
}
|
||||
};
|
||||
|
||||
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) {
|
||||
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) {
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
|
||||
if (code[0] == token_mov_vcchi) {
|
||||
|
@ -137,12 +137,15 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler&
|
||||
size_bytes);
|
||||
}
|
||||
|
||||
std::pair<u8*, u64> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||
std::pair<u8*, u64> StreamBuffer::Map(u64 size, u64 alignment, bool allow_wait) {
|
||||
if (!is_coherent && usage == MemoryUsage::Stream) {
|
||||
size = Common::AlignUp(size, instance->NonCoherentAtomSize());
|
||||
}
|
||||
|
||||
ASSERT(size <= this->size_bytes);
|
||||
if (size > this->size_bytes) {
|
||||
return {nullptr, 0};
|
||||
}
|
||||
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
@ -162,8 +165,11 @@ std::pair<u8*, u64> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||
}
|
||||
|
||||
const u64 mapped_upper_bound = offset + size;
|
||||
WaitPendingOperations(mapped_upper_bound);
|
||||
return std::make_pair(mapped_data.data() + offset, offset);
|
||||
if (!WaitPendingOperations(mapped_upper_bound, allow_wait)) {
|
||||
return {nullptr, 0};
|
||||
}
|
||||
|
||||
return {mapped_data.data() + offset, offset};
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit() {
|
||||
@ -177,6 +183,12 @@ void StreamBuffer::Commit() {
|
||||
}
|
||||
|
||||
offset += mapped_size;
|
||||
if (current_watch_cursor != 0 &&
|
||||
current_watches[current_watch_cursor].tick == scheduler->CurrentTick()) {
|
||||
current_watches[current_watch_cursor].upper_bound = offset;
|
||||
return;
|
||||
}
|
||||
|
||||
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||
@ -191,16 +203,20 @@ void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_
|
||||
watches.resize(watches.size() + grow_size);
|
||||
}
|
||||
|
||||
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||
bool StreamBuffer::WaitPendingOperations(u64 requested_upper_bound, bool allow_wait) {
|
||||
if (!invalidation_mark) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
if (!scheduler->IsFree(watch.tick) && !allow_wait) {
|
||||
return false;
|
||||
}
|
||||
scheduler->Wait(watch.tick);
|
||||
wait_bound = watch.upper_bound;
|
||||
++wait_cursor;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -168,7 +168,7 @@ public:
|
||||
MemoryUsage usage, u64 size_bytes_);
|
||||
|
||||
/// Reserves a region of memory from the stream buffer.
|
||||
std::pair<u8*, u64> Map(u64 size, u64 alignment = 0);
|
||||
std::pair<u8*, u64> Map(u64 size, u64 alignment = 0, bool allow_wait = true);
|
||||
|
||||
/// Ensures that reserved bytes of memory are available to the GPU.
|
||||
void Commit();
|
||||
@ -181,10 +181,6 @@ public:
|
||||
return offset;
|
||||
}
|
||||
|
||||
u64 GetFreeSize() const {
|
||||
return size_bytes - offset - mapped_size;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Watch {
|
||||
u64 tick{};
|
||||
@ -195,7 +191,7 @@ private:
|
||||
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||
|
||||
/// Waits pending watches until requested upper bound.
|
||||
void WaitPendingOperations(u64 requested_upper_bound);
|
||||
bool WaitPendingOperations(u64 requested_upper_bound, bool allow_wait);
|
||||
|
||||
private:
|
||||
u64 offset{};
|
||||
|
@ -48,6 +48,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
|
||||
memory_tracker = std::make_unique<MemoryTracker>(tracker);
|
||||
|
||||
std::memset(gds_buffer.mapped_data.data(), 0, DataShareBufferSize);
|
||||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16);
|
||||
@ -137,8 +139,7 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
return;
|
||||
}
|
||||
memory_tracker->InvalidateRegion(
|
||||
device_addr, size, Config::readbacks(),
|
||||
[this, device_addr, size] { ReadMemory(device_addr, size, true); });
|
||||
device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); });
|
||||
}
|
||||
|
||||
void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) {
|
||||
@ -312,7 +313,10 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
|
||||
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
|
||||
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
|
||||
if (!is_gds) {
|
||||
ASSERT(memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes));
|
||||
if (!memory->TryWriteBacking(std::bit_cast<void*>(address), value, num_bytes)) {
|
||||
std::memcpy(std::bit_cast<void*>(address), value, num_bytes);
|
||||
return;
|
||||
}
|
||||
if (!IsRegionRegistered(address, num_bytes)) {
|
||||
return;
|
||||
}
|
||||
@ -816,22 +820,22 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer) {
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
size_t total_size_bytes = 0;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
vk::Buffer src_buffer = VK_NULL_HANDLE;
|
||||
memory_tracker->ForEachUploadRange(
|
||||
device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) {
|
||||
const u64 offset = staging_buffer.Copy(device_addr_out, range_size);
|
||||
copies.push_back(vk::BufferCopy{
|
||||
.srcOffset = offset,
|
||||
.dstOffset = device_addr_out - buffer_start,
|
||||
.size = range_size,
|
||||
});
|
||||
});
|
||||
device_addr, size, is_written,
|
||||
[&](u64 device_addr_out, u64 range_size) {
|
||||
copies.emplace_back(total_size_bytes, device_addr_out - buffer_start, range_size);
|
||||
total_size_bytes += range_size;
|
||||
},
|
||||
[&] { src_buffer = UploadCopies(buffer, copies, total_size_bytes); });
|
||||
SCOPE_EXIT {
|
||||
if (is_texel_buffer) {
|
||||
SynchronizeBufferFromImage(buffer, device_addr, size);
|
||||
}
|
||||
};
|
||||
if (copies.empty()) {
|
||||
if (!src_buffer) {
|
||||
return;
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
@ -860,7 +864,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.buffer, copies);
|
||||
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
@ -868,6 +872,39 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
});
|
||||
}
|
||||
|
||||
vk::Buffer BufferCache::UploadCopies(Buffer& buffer, std::span<vk::BufferCopy> copies,
|
||||
size_t total_size_bytes) {
|
||||
if (copies.empty()) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
const auto [staging, offset] = staging_buffer.Map(total_size_bytes);
|
||||
if (staging) {
|
||||
for (auto& copy : copies) {
|
||||
u8* const src_pointer = staging + copy.srcOffset;
|
||||
const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset;
|
||||
std::memcpy(src_pointer, std::bit_cast<const u8*>(device_addr), copy.size);
|
||||
// Apply the staging offset
|
||||
copy.srcOffset += offset;
|
||||
}
|
||||
staging_buffer.Commit();
|
||||
return staging_buffer.Handle();
|
||||
} else {
|
||||
// For large one time transfers use a temporary host buffer.
|
||||
auto temp_buffer =
|
||||
std::make_unique<Buffer>(instance, scheduler, MemoryUsage::Upload, 0,
|
||||
vk::BufferUsageFlagBits::eTransferSrc, total_size_bytes);
|
||||
const vk::Buffer src_buffer = temp_buffer->Handle();
|
||||
u8* const staging = temp_buffer->mapped_data.data();
|
||||
for (const auto& copy : copies) {
|
||||
u8* const src_pointer = staging + copy.srcOffset;
|
||||
const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset;
|
||||
std::memcpy(src_pointer, std::bit_cast<const u8*>(device_addr), copy.size);
|
||||
}
|
||||
scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable { buffer.reset(); });
|
||||
return src_buffer;
|
||||
}
|
||||
}
|
||||
|
||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||
boost::container::small_vector<ImageId, 6> image_ids;
|
||||
texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) {
|
||||
|
@ -112,7 +112,7 @@ public:
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
|
||||
/// Waits on pending downloads in the logical page range.
|
||||
/// Flushes any GPU modified buffer in the logical page range back to CPU memory.
|
||||
void ReadMemory(VAddr device_addr, u64 size, bool is_write = false);
|
||||
|
||||
/// Binds host vertex buffers for the current draw.
|
||||
@ -194,6 +194,9 @@ private:
|
||||
void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written,
|
||||
bool is_texel_buffer);
|
||||
|
||||
vk::Buffer UploadCopies(Buffer& buffer, std::span<vk::BufferCopy> copies,
|
||||
size_t total_size_bytes);
|
||||
|
||||
bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size);
|
||||
|
||||
void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes);
|
||||
|
@ -5,7 +5,6 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <deque>
|
||||
#include <shared_mutex>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include "common/debug.h"
|
||||
@ -25,9 +24,8 @@ public:
|
||||
~MemoryTracker() = default;
|
||||
|
||||
/// Returns true if a region has been modified from the CPU
|
||||
template <bool locking = true>
|
||||
bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IterateRegions<true, locking>(
|
||||
return IteratePages<true>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||
@ -35,9 +33,8 @@ public:
|
||||
}
|
||||
|
||||
/// Returns true if a region has been modified from the GPU
|
||||
template <bool locking = true>
|
||||
bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IterateRegions<false, locking>(
|
||||
return IteratePages<false>(
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||
@ -45,43 +42,40 @@ public:
|
||||
}
|
||||
|
||||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Unmark region as modified from the host GPU
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IterateRegions<false, locking>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::GPU, false, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ChangeRegionState<Type::GPU, false>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Removes all protection from a page and ensures GPU data has been flushed if requested
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept {
|
||||
IterateRegions<false, locking>(
|
||||
cpu_addr, size,
|
||||
[try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||
void InvalidateRegion(VAddr cpu_addr, u64 size, auto&& on_flush) noexcept {
|
||||
IteratePages<false>(
|
||||
cpu_addr, size, [&on_flush](RegionManager* manager, u64 offset, size_t size) {
|
||||
const bool should_flush = [&] {
|
||||
// Perform both the GPU modification check and CPU state change with the lock
|
||||
// in case we are racing with GPU thread trying to mark the page as GPU
|
||||
// modified. If we need to flush the flush function is going to perform CPU
|
||||
// state change.
|
||||
std::scoped_lock lk{manager->lock};
|
||||
if (try_flush && manager->template IsRegionModified<Type::GPU>(offset, size)) {
|
||||
if (Config::readbacks() &&
|
||||
manager->template IsRegionModified<Type::GPU>(offset, size)) {
|
||||
return true;
|
||||
}
|
||||
manager->template ChangeRegionState<Type::CPU, true, defer_protect>(
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
return false;
|
||||
}();
|
||||
@ -92,51 +86,38 @@ public:
|
||||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
template <bool defer_protect = false, bool locking = true>
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) {
|
||||
IterateRegions<true, locking>(
|
||||
query_cpu_range, query_size,
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::CPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
if (is_written) {
|
||||
manager->template ChangeRegionState<Type::GPU, true, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
}
|
||||
});
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func,
|
||||
auto&& on_upload) {
|
||||
IteratePages<true>(query_cpu_range, query_size,
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->lock.lock();
|
||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
if (!is_written) {
|
||||
manager->lock.unlock();
|
||||
}
|
||||
});
|
||||
on_upload();
|
||||
if (!is_written) {
|
||||
return;
|
||||
}
|
||||
IteratePages<false>(query_cpu_range, query_size,
|
||||
[&func, is_written](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
manager->lock.unlock();
|
||||
});
|
||||
}
|
||||
|
||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||
template <bool clear, bool defer_protect = false, bool locking = true>
|
||||
template <bool clear>
|
||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, auto&& func) {
|
||||
IterateRegions<false, locking>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::GPU, clear, defer_protect>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
});
|
||||
}
|
||||
|
||||
/// Notifies deferred protection changes to the tracker.
|
||||
template <Type type, bool enable, bool locking = true>
|
||||
void PerformDeferredProtections() {
|
||||
ForEachRegion<locking>([&](RegionManager* manager) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template PerformDeferredProtections<type, enable>();
|
||||
});
|
||||
}
|
||||
|
||||
/// Notifies all deferred protection changes to the tracker.
|
||||
|
||||
/// Lck the memory tracker.
|
||||
void Lock() {
|
||||
global_lock.lock();
|
||||
}
|
||||
|
||||
/// Unlock the memory tracker.
|
||||
void Unlock() {
|
||||
global_lock.unlock();
|
||||
IteratePages<false>(query_cpu_range, query_size,
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
std::scoped_lock lk{manager->lock};
|
||||
manager->template ForEachModifiedRange<Type::GPU, clear>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
@ -147,75 +128,42 @@ private:
|
||||
* @param func Callback for each word manager.
|
||||
* @return
|
||||
*/
|
||||
template <bool create_region_on_fail, bool locking, typename Func>
|
||||
bool IterateRegions(VAddr cpu_address, size_t size, Func&& func) {
|
||||
template <bool create_region_on_fail, typename Func>
|
||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
||||
RENDERER_TRACE;
|
||||
const auto run = [&]() {
|
||||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
|
||||
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
|
||||
auto* manager{top_tier[page_index]};
|
||||
if (manager) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
} else if constexpr (create_region_on_fail) {
|
||||
CreateRegion(page_index);
|
||||
manager = top_tier[page_index];
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{cpu_address >> TRACKER_HIGHER_PAGE_BITS};
|
||||
u64 page_offset{cpu_address & TRACKER_HIGHER_PAGE_MASK};
|
||||
while (remaining_size > 0) {
|
||||
const std::size_t copy_amount{
|
||||
std::min<std::size_t>(TRACKER_HIGHER_PAGE_SIZE - page_offset, remaining_size)};
|
||||
auto* manager{top_tier[page_index]};
|
||||
if (manager) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if constexpr (locking) {
|
||||
std::shared_lock lock{global_lock};
|
||||
return run();
|
||||
} else {
|
||||
return run();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Iterate throw all regions in the memory tracker.
|
||||
* @param func Callback for each region manager.
|
||||
* @return
|
||||
*/
|
||||
template <bool locking, typename Func>
|
||||
void ForEachRegion(Func&& func) {
|
||||
RENDERER_TRACE;
|
||||
const auto run = [&]() {
|
||||
for (auto& pool : manager_pool) {
|
||||
for (auto& manager : pool) {
|
||||
if (manager.GetCpuAddr() != 0) {
|
||||
func(&manager);
|
||||
} else if constexpr (create_region_on_fail) {
|
||||
CreateRegion(page_index);
|
||||
manager = top_tier[page_index];
|
||||
if constexpr (BOOL_BREAK) {
|
||||
if (func(manager, page_offset, copy_amount)) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
func(manager, page_offset, copy_amount);
|
||||
}
|
||||
}
|
||||
};
|
||||
if constexpr (locking) {
|
||||
std::shared_lock lock{global_lock};
|
||||
run();
|
||||
} else {
|
||||
run();
|
||||
page_index++;
|
||||
page_offset = 0;
|
||||
remaining_size -= copy_amount;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void CreateRegion(std::size_t page_index) {
|
||||
@ -239,7 +187,6 @@ private:
|
||||
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
|
||||
std::vector<RegionManager*> free_managers;
|
||||
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
|
||||
std::shared_mutex global_lock;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -324,11 +324,21 @@ public:
|
||||
return properties.limits.maxViewportDimensions[0];
|
||||
}
|
||||
|
||||
/// Returns the maximum viewport height.
|
||||
/// Returns the maximum viewport height.
|
||||
u32 GetMaxViewportHeight() const {
|
||||
return properties.limits.maxViewportDimensions[1];
|
||||
}
|
||||
|
||||
/// Returns the maximum render area width.
|
||||
u32 GetMaxFramebufferWidth() const {
|
||||
return properties.limits.maxFramebufferWidth;
|
||||
}
|
||||
|
||||
/// Returns the maximum render area height.
|
||||
u32 GetMaxFramebufferHeight() const {
|
||||
return properties.limits.maxFramebufferHeight;
|
||||
}
|
||||
|
||||
/// Returns the sample count flags supported by framebuffers.
|
||||
vk::SampleCountFlags GetFramebufferSampleCounts() const {
|
||||
return properties.limits.framebufferColorSampleCounts &
|
||||
|
@ -113,6 +113,8 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
||||
// Prefetch color and depth buffers to let texture cache handle possible overlaps with bound
|
||||
// textures (e.g. mipgen)
|
||||
RenderState state;
|
||||
state.width = instance.GetMaxFramebufferWidth();
|
||||
state.height = instance.GetMaxFramebufferHeight();
|
||||
|
||||
cb_descs.clear();
|
||||
db_desc.reset();
|
||||
@ -272,6 +274,8 @@ void Rasterizer::EliminateFastClear() {
|
||||
void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
|
||||
RENDERER_TRACE;
|
||||
|
||||
scheduler.PopPendingOperations();
|
||||
|
||||
if (!FilterDraw()) {
|
||||
return;
|
||||
}
|
||||
@ -317,6 +321,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
u32 max_count, VAddr count_address) {
|
||||
RENDERER_TRACE;
|
||||
|
||||
scheduler.PopPendingOperations();
|
||||
|
||||
if (!FilterDraw()) {
|
||||
return;
|
||||
}
|
||||
@ -380,6 +386,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
|
||||
void Rasterizer::DispatchDirect() {
|
||||
RENDERER_TRACE;
|
||||
|
||||
scheduler.PopPendingOperations();
|
||||
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||
if (!pipeline) {
|
||||
@ -407,6 +415,8 @@ void Rasterizer::DispatchDirect() {
|
||||
void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
|
||||
RENDERER_TRACE;
|
||||
|
||||
scheduler.PopPendingOperations();
|
||||
|
||||
const auto& cs_program = liverpool->GetCsRegs();
|
||||
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
|
||||
if (!pipeline) {
|
||||
@ -439,11 +449,12 @@ void Rasterizer::Finish() {
|
||||
scheduler.Finish();
|
||||
}
|
||||
|
||||
void Rasterizer::ProcessFaults() {
|
||||
void Rasterizer::EndCommandList() {
|
||||
if (fault_process_pending) {
|
||||
fault_process_pending = false;
|
||||
buffer_cache.ProcessFaultBuffer();
|
||||
}
|
||||
texture_cache.ProcessDownloadImages();
|
||||
}
|
||||
|
||||
bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
@ -645,8 +656,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
if (instance.IsNullDescriptorSupported()) {
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||
} else {
|
||||
auto& null_image_view =
|
||||
texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info);
|
||||
auto& null_image_view = texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc);
|
||||
image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view,
|
||||
vk::ImageLayout::eGeneral);
|
||||
}
|
||||
@ -660,7 +670,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
|
||||
bound_images.emplace_back(image_id);
|
||||
|
||||
auto& image = texture_cache.GetImage(image_id);
|
||||
auto& image_view = texture_cache.FindTexture(image_id, desc.view_info);
|
||||
auto& image_view = texture_cache.FindTexture(image_id, desc);
|
||||
|
||||
if (image.binding.force_general || image.binding.is_target) {
|
||||
image.Transit(vk::ImageLayout::eGeneral,
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
void CpSync();
|
||||
u64 Flush();
|
||||
void Finish();
|
||||
void ProcessFaults();
|
||||
void EndCommandList();
|
||||
|
||||
PipelineCache& GetPipelineCache() {
|
||||
return pipeline_cache;
|
||||
|
@ -34,16 +34,11 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
|
||||
is_rendering = true;
|
||||
render_state = new_state;
|
||||
|
||||
const auto width =
|
||||
render_state.width != std::numeric_limits<u32>::max() ? render_state.width : 1;
|
||||
const auto height =
|
||||
render_state.height != std::numeric_limits<u32>::max() ? render_state.height : 1;
|
||||
|
||||
const vk::RenderingInfo rendering_info = {
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {0, 0},
|
||||
.extent = {width, height},
|
||||
.extent = {render_state.width, render_state.height},
|
||||
},
|
||||
.layerCount = 1,
|
||||
.colorAttachmentCount = render_state.num_color_attachments,
|
||||
@ -101,6 +96,14 @@ void Scheduler::Wait(u64 tick) {
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::PopPendingOperations() {
|
||||
master_semaphore.Refresh();
|
||||
while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) {
|
||||
pending_ops.front().callback();
|
||||
pending_ops.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
const vk::CommandBufferBeginInfo begin_info = {
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||
@ -175,10 +178,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
|
||||
AllocateWorkerCommandBuffers();
|
||||
|
||||
// Apply pending operations
|
||||
while (!pending_ops.empty() && IsFree(pending_ops.front().gpu_tick)) {
|
||||
pending_ops.front().callback();
|
||||
pending_ops.pop();
|
||||
}
|
||||
PopPendingOperations();
|
||||
}
|
||||
|
||||
void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) {
|
||||
|
@ -26,8 +26,8 @@ struct RenderState {
|
||||
u32 num_color_attachments{};
|
||||
bool has_depth{};
|
||||
bool has_stencil{};
|
||||
u32 width = std::numeric_limits<u32>::max();
|
||||
u32 height = std::numeric_limits<u32>::max();
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
|
||||
bool operator==(const RenderState& other) const noexcept {
|
||||
return std::memcmp(this, &other, sizeof(RenderState)) == 0;
|
||||
@ -317,6 +317,9 @@ public:
|
||||
/// Waits for the given tick to trigger on the GPU.
|
||||
void Wait(u64 tick);
|
||||
|
||||
/// Attempts to execute operations whose tick the GPU has caught up with.
|
||||
void PopPendingOperations();
|
||||
|
||||
/// Starts a new rendering scope with provided state.
|
||||
void BeginRendering(const RenderState& new_state);
|
||||
|
||||
@ -344,7 +347,11 @@ public:
|
||||
}
|
||||
|
||||
/// Returns true when a tick has been triggered by the GPU.
|
||||
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
|
||||
[[nodiscard]] bool IsFree(u64 tick) noexcept {
|
||||
if (master_semaphore.IsFree(tick)) {
|
||||
return true;
|
||||
}
|
||||
master_semaphore.Refresh();
|
||||
return master_semaphore.IsFree(tick);
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,9 @@
|
||||
#include <xxhash.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/debug.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -58,6 +60,50 @@ ImageId TextureCache::GetNullImage(const vk::Format format) {
|
||||
return null_id;
|
||||
}
|
||||
|
||||
void TextureCache::ProcessDownloadImages() {
|
||||
for (const ImageId image_id : download_images) {
|
||||
DownloadImageMemory(image_id);
|
||||
}
|
||||
download_images.clear();
|
||||
}
|
||||
|
||||
void TextureCache::DownloadImageMemory(ImageId image_id) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
return;
|
||||
}
|
||||
auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download);
|
||||
const u32 download_size = image.info.pitch * image.info.size.height *
|
||||
image.info.resources.layers * (image.info.num_bits / 8);
|
||||
ASSERT(download_size <= image.info.guest_size);
|
||||
const auto [download, offset] = download_buffer.Map(download_size);
|
||||
download_buffer.Commit();
|
||||
const vk::BufferImageCopy image_download = {
|
||||
.bufferOffset = offset,
|
||||
.bufferRowLength = image.info.pitch,
|
||||
.bufferImageHeight = image.info.size.height,
|
||||
.imageSubresource =
|
||||
{
|
||||
.aspectMask = image.info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth
|
||||
: vk::ImageAspectFlagBits::eColor,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = image.info.resources.layers,
|
||||
},
|
||||
.imageOffset = {0, 0, 0},
|
||||
.imageExtent = {image.info.size.width, image.info.size.height, 1},
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
download_buffer.Handle(), image_download);
|
||||
scheduler.DeferOperation([device_addr = image.info.guest_address, download, download_size] {
|
||||
auto* memory = Core::Memory::Instance();
|
||||
memory->TryWriteBacking(std::bit_cast<u8*>(device_addr), download, download_size);
|
||||
});
|
||||
}
|
||||
|
||||
void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) {
|
||||
if (image.hash == 0) {
|
||||
// Initialize hash
|
||||
@ -437,16 +483,27 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo
|
||||
return slot_image_views[view_id];
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) {
|
||||
ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (desc.type == BindingType::Storage) {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
if (Config::readbackLinearImages() &&
|
||||
image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) {
|
||||
download_images.emplace(image_id);
|
||||
}
|
||||
}
|
||||
UpdateImage(image_id);
|
||||
return RegisterImageView(image_id, view_info);
|
||||
return RegisterImageView(image_id, desc.view_info);
|
||||
}
|
||||
|
||||
ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) {
|
||||
const ImageId image_id = FindImage(desc);
|
||||
Image& image = slot_images[image_id];
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
if (Config::readbackLinearImages() &&
|
||||
image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) {
|
||||
download_images.emplace(image_id);
|
||||
}
|
||||
image.usage.render_target = 1u;
|
||||
UpdateImage(image_id);
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_set>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
|
||||
@ -105,11 +106,14 @@ public:
|
||||
/// Evicts any images that overlap the unmapped range.
|
||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||
|
||||
/// Schedules a copy of pending images for download back to CPU memory.
|
||||
void ProcessDownloadImages();
|
||||
|
||||
/// Retrieves the image handle of the image with the provided attributes.
|
||||
[[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {});
|
||||
|
||||
/// Retrieves an image view with the properties of the specified image id.
|
||||
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info);
|
||||
[[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc);
|
||||
|
||||
/// Retrieves the render target with specified properties
|
||||
[[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc);
|
||||
@ -252,6 +256,9 @@ private:
|
||||
/// Gets or creates a null image for a particular format.
|
||||
ImageId GetNullImage(vk::Format format);
|
||||
|
||||
/// Copies image memory back to CPU.
|
||||
void DownloadImageMemory(ImageId image_id);
|
||||
|
||||
/// Create an image from the given parameters
|
||||
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr);
|
||||
|
||||
@ -293,6 +300,7 @@ private:
|
||||
Common::SlotVector<ImageView> slot_image_views;
|
||||
tsl::robin_map<u64, Sampler> samplers;
|
||||
tsl::robin_map<vk::Format, ImageId> null_images;
|
||||
std::unordered_set<ImageId> download_images;
|
||||
PageTable page_table;
|
||||
std::mutex mutex;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user