diff --git a/CMakeLists.txt b/CMakeLists.txt index 90189fbd9..df2a48fbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,7 +203,7 @@ execute_process( # Set Version set(EMULATOR_VERSION_MAJOR "0") -set(EMULATOR_VERSION_MINOR "9") +set(EMULATOR_VERSION_MINOR "10") set(EMULATOR_VERSION_PATCH "1") set_source_files_properties(src/shadps4.rc PROPERTIES COMPILE_DEFINITIONS "EMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR};EMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR};EMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}") diff --git a/cmake/DetectQtInstallation.cmake b/cmake/DetectQtInstallation.cmake index e95e8980f..650cc9745 100644 --- a/cmake/DetectQtInstallation.cmake +++ b/cmake/DetectQtInstallation.cmake @@ -1,14 +1,28 @@ # SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later -file(GLOB QT_KITS LIST_DIRECTORIES true "C:/Qt/*/msvc*_64") -list(SORT QT_KITS COMPARE NATURAL) -list(REVERSE QT_KITS) -if(QT_KITS) - list(GET QT_KITS 0 QT_PREFIX) - set(CMAKE_PREFIX_PATH "${QT_PREFIX}" CACHE PATH "Qt prefix auto‑detected" FORCE) - message(STATUS "Auto-detected Qt prefix: ${QT_PREFIX}") -else() - message(STATUS "findQt.cmake: no Qt‑Directory found in C:/Qt – please set CMAKE_PREFIX_PATH manually") -endif() +set(highest_version "0") +set(CANDIDATE_DRIVES A B C D E F G H I J K L M N O P Q R S T U V W X Y Z) +foreach(drive ${CANDIDATE_DRIVES}) + file(GLOB kits LIST_DIRECTORIES true CONFIGURE_DEPENDS "${drive}:/Qt/*/msvc*_64") + foreach(kit IN LISTS kits) + get_filename_component(version_dir "${kit}" DIRECTORY) + get_filename_component(kit_version "${version_dir}" NAME) + + message(STATUS "DetectQtInstallation.cmake: Detected Qt: ${kit}") + + if (kit_version VERSION_GREATER highest_version) + set(highest_version "${kit_version}") + set(QT_PREFIX "${kit}") + + endif() + endforeach() +endforeach() + +if(QT_PREFIX) + set(CMAKE_PREFIX_PATH "${QT_PREFIX}" CACHE PATH "Qt prefix auto‑detected" FORCE) + message(STATUS "DetectQtInstallation.cmake: Choose newest Qt: ${QT_PREFIX}") +else() + message(STATUS "DetectQtInstallation.cmake: No Qt‑Directory found in :/Qt – please set CMAKE_PREFIX_PATH manually") +endif() diff --git a/dist/net.shadps4.shadPS4.metainfo.xml b/dist/net.shadps4.shadPS4.metainfo.xml index 493dc0df6..f9bd7c7c2 100644 --- a/dist/net.shadps4.shadPS4.metainfo.xml +++ b/dist/net.shadps4.shadPS4.metainfo.xml @@ -37,6 +37,9 @@ Game + + https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.10.0 + https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.9.0 diff --git a/src/common/config.cpp b/src/common/config.cpp index 4a764a4c6..6f8563377 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -31,31 +31,53 @@ std::filesystem::path find_fs_path_or(const basic_value& v, const K& ky, namespace Config { +// General static bool isNeo = false; static bool isDevKit = false; +static bool isPSNSignedIn = false; static bool isTrophyPopupDisabled = false; +static double trophyNotificationDuration = 6.0; static bool enableDiscordRPC = false; -static u32 screenWidth = 1280; -static u32 screenHeight = 720; -static s32 gpuId = -1; // Vulkan physical device index. Set to negative for auto select -static std::string logFilter; +static std::string logFilter = ""; static std::string logType = "sync"; static std::string userName = "shadPS4"; -static std::string chooseHomeTab; +static std::string chooseHomeTab = "General"; +static bool isShowSplash = false; +static std::string isSideTrophy = "right"; +static bool compatibilityData = false; +static bool checkCompatibilityOnStartup = false; + +// Input +static int cursorState = HideCursorState::Idle; +static int cursorHideTimeout = 5; // 5 seconds (default) static bool useSpecialPad = false; static int specialPadClass = 1; static bool isMotionControlsEnabled = true; -static bool isDebugDump = false; -static bool isShaderDebug = false; -static bool isShowSplash = false; -static std::string isSideTrophy = "right"; +static bool useUnifiedInputConfig = true; + +// These two entries aren't stored in the config +static bool overrideControllerColor = false; +static int controllerCustomColorRGB[3] = {0, 0, 255}; + +// GPU +static u32 windowWidth = 1280; +static u32 windowHeight = 720; +static u32 internalScreenWidth = 1280; +static u32 internalScreenHeight = 720; static bool isNullGpu = false; static bool shouldCopyGPUBuffers = false; static bool readbacksEnabled = false; +static bool readbackLinearImagesEnabled = false; static bool directMemoryAccessEnabled = false; static bool shouldDumpShaders = false; -static bool shouldPatchShaders = true; +static bool shouldPatchShaders = false; static u32 vblankDivider = 1; +static bool isFullscreen = false; +static std::string fullscreenMode = "Windowed"; +static bool isHDRAllowed = false; + +// Vulkan +static s32 gpuId = -1; static bool vkValidation = false; static bool vkValidationSync = false; static bool vkValidationGpu = false; @@ -63,32 +85,29 @@ static bool vkCrashDiagnostic = false; static bool vkHostMarkers = false; static bool vkGuestMarkers = false; static bool rdocEnable = false; -static bool isFpsColor = true; -static bool isSeparateLogFilesEnabled = false; -static int cursorState = HideCursorState::Idle; -static int cursorHideTimeout = 5; // 5 seconds (default) -static double trophyNotificationDuration = 6.0; -static bool useUnifiedInputConfig = true; -static bool overrideControllerColor = false; -static int controllerCustomColorRGB[3] = {0, 0, 255}; -static bool compatibilityData = false; -static bool checkCompatibilityOnStartup = false; -static std::string trophyKey; -static bool isPSNSignedIn = false; -// Gui +// Debug +static bool isDebugDump = false; +static bool isShaderDebug = false; +static bool isSeparateLogFilesEnabled = false; +static bool isFpsColor = true; + +// GUI static bool load_game_size = true; static std::vector settings_install_dirs = {}; std::vector install_dirs_enabled = {}; std::filesystem::path settings_addon_install_dir = {}; std::filesystem::path save_data_path = {}; -static bool isFullscreen = false; -static std::string fullscreenMode = "Windowed"; -static bool isHDRAllowed = false; -// Language +// Settings u32 m_language = 1; // english +// Keys +static std::string trophyKey = ""; + +// Expected number of items in the config file +static constexpr u64 total_entries = 54; + bool allowHDR() { return isHDRAllowed; } @@ -178,12 +197,20 @@ double getTrophyNotificationDuration() { return trophyNotificationDuration; } -u32 getScreenWidth() { - return screenWidth; +u32 getWindowWidth() { + return windowWidth; } -u32 getScreenHeight() { - return screenHeight; +u32 getWindowHeight() { + return windowHeight; +} + +u32 getInternalScreenWidth() { + return internalScreenHeight; +} + +u32 getInternalScreenHeight() { + return internalScreenHeight; } s32 getGpuId() { @@ -246,6 +273,10 @@ bool readbacks() { return readbacksEnabled; } +bool readbackLinearImages() { + return readbackLinearImagesEnabled; +} + bool directMemoryAccess() { return directMemoryAccessEnabled; } @@ -318,12 +349,20 @@ void setGpuId(s32 selectedGpuId) { gpuId = selectedGpuId; } -void setScreenWidth(u32 width) { - screenWidth = width; +void setWindowWidth(u32 width) { + windowWidth = width; } -void setScreenHeight(u32 height) { - screenHeight = height; +void setWindowHeight(u32 height) { + windowHeight = height; +} + +void setInternalScreenWidth(u32 width) { + internalScreenWidth = width; +} + +void setInternalScreenHeight(u32 height) { + internalScreenHeight = height; } void setDebugDump(bool enable) { @@ -405,6 +444,7 @@ void setCursorState(s16 newCursorState) { void setCursorHideTimeout(int newcursorHideTimeout) { cursorHideTimeout = newcursorHideTimeout; } + void setTrophyNotificationDuration(double newTrophyNotificationDuration) { trophyNotificationDuration = newTrophyNotificationDuration; } @@ -565,81 +605,104 @@ void load(const std::filesystem::path& path) { fmt::print("Got exception trying to load config file. Exception: {}\n", ex.what()); return; } + + u64 entry_count = 0; + if (data.contains("General")) { const toml::value& general = data.at("General"); - isNeo = toml::find_or(general, "isPS4Pro", false); - isDevKit = toml::find_or(general, "isDevKit", false); - isPSNSignedIn = toml::find_or(general, "isPSNSignedIn", false); - isTrophyPopupDisabled = toml::find_or(general, "isTrophyPopupDisabled", false); - trophyNotificationDuration = - toml::find_or(general, "trophyNotificationDuration", 5.0); - enableDiscordRPC = toml::find_or(general, "enableDiscordRPC", true); - logFilter = toml::find_or(general, "logFilter", ""); - logType = toml::find_or(general, "logType", "sync"); - userName = toml::find_or(general, "userName", "shadPS4"); - isShowSplash = toml::find_or(general, "showSplash", true); - isSideTrophy = toml::find_or(general, "sideTrophy", "right"); - compatibilityData = toml::find_or(general, "compatibilityEnabled", false); - checkCompatibilityOnStartup = - toml::find_or(general, "checkCompatibilityOnStartup", false); - chooseHomeTab = toml::find_or(general, "chooseHomeTab", "Release"); + isNeo = toml::find_or(general, "isPS4Pro", isNeo); + isDevKit = toml::find_or(general, "isDevKit", isDevKit); + isPSNSignedIn = toml::find_or(general, "isPSNSignedIn", isPSNSignedIn); + isTrophyPopupDisabled = + toml::find_or(general, "isTrophyPopupDisabled", isTrophyPopupDisabled); + trophyNotificationDuration = toml::find_or(general, "trophyNotificationDuration", + trophyNotificationDuration); + enableDiscordRPC = toml::find_or(general, "enableDiscordRPC", enableDiscordRPC); + logFilter = toml::find_or(general, "logFilter", logFilter); + logType = toml::find_or(general, "logType", logType); + userName = toml::find_or(general, "userName", userName); + isShowSplash = toml::find_or(general, "showSplash", isShowSplash); + isSideTrophy = toml::find_or(general, "sideTrophy", isSideTrophy); + compatibilityData = toml::find_or(general, "compatibilityEnabled", compatibilityData); + checkCompatibilityOnStartup = toml::find_or(general, "checkCompatibilityOnStartup", + checkCompatibilityOnStartup); + chooseHomeTab = toml::find_or(general, "chooseHomeTab", chooseHomeTab); + + entry_count += general.size(); } if (data.contains("Input")) { const toml::value& input = data.at("Input"); - cursorState = toml::find_or(input, "cursorState", HideCursorState::Idle); - cursorHideTimeout = toml::find_or(input, "cursorHideTimeout", 5); - useSpecialPad = toml::find_or(input, "useSpecialPad", false); - specialPadClass = toml::find_or(input, "specialPadClass", 1); - isMotionControlsEnabled = toml::find_or(input, "isMotionControlsEnabled", true); - useUnifiedInputConfig = toml::find_or(input, "useUnifiedInputConfig", true); + cursorState = toml::find_or(input, "cursorState", cursorState); + cursorHideTimeout = toml::find_or(input, "cursorHideTimeout", cursorHideTimeout); + useSpecialPad = toml::find_or(input, "useSpecialPad", useSpecialPad); + specialPadClass = toml::find_or(input, "specialPadClass", specialPadClass); + isMotionControlsEnabled = + toml::find_or(input, "isMotionControlsEnabled", isMotionControlsEnabled); + useUnifiedInputConfig = + toml::find_or(input, "useUnifiedInputConfig", useUnifiedInputConfig); + + entry_count += input.size(); } if (data.contains("GPU")) { const toml::value& gpu = data.at("GPU"); - screenWidth = toml::find_or(gpu, "screenWidth", screenWidth); - screenHeight = toml::find_or(gpu, "screenHeight", screenHeight); - isNullGpu = toml::find_or(gpu, "nullGpu", false); - shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", false); - readbacksEnabled = toml::find_or(gpu, "readbacks", false); - directMemoryAccessEnabled = toml::find_or(gpu, "directMemoryAccess", false); - shouldDumpShaders = toml::find_or(gpu, "dumpShaders", false); - shouldPatchShaders = toml::find_or(gpu, "patchShaders", true); - vblankDivider = toml::find_or(gpu, "vblankDivider", 1); - isFullscreen = toml::find_or(gpu, "Fullscreen", false); - fullscreenMode = toml::find_or(gpu, "FullscreenMode", "Windowed"); - isHDRAllowed = toml::find_or(gpu, "allowHDR", false); + windowWidth = toml::find_or(gpu, "screenWidth", windowWidth); + windowHeight = toml::find_or(gpu, "screenHeight", windowHeight); + internalScreenWidth = toml::find_or(gpu, "internalScreenWidth", internalScreenWidth); + internalScreenHeight = + toml::find_or(gpu, "internalScreenHeight", internalScreenHeight); + isNullGpu = toml::find_or(gpu, "nullGpu", isNullGpu); + shouldCopyGPUBuffers = toml::find_or(gpu, "copyGPUBuffers", shouldCopyGPUBuffers); + readbacksEnabled = toml::find_or(gpu, "readbacks", readbacksEnabled); + readbackLinearImagesEnabled = + toml::find_or(gpu, "readbackLinearImages", readbackLinearImagesEnabled); + directMemoryAccessEnabled = + toml::find_or(gpu, "directMemoryAccess", directMemoryAccessEnabled); + shouldDumpShaders = toml::find_or(gpu, "dumpShaders", shouldDumpShaders); + shouldPatchShaders = toml::find_or(gpu, "patchShaders", shouldPatchShaders); + vblankDivider = toml::find_or(gpu, "vblankDivider", vblankDivider); + isFullscreen = toml::find_or(gpu, "Fullscreen", isFullscreen); + fullscreenMode = toml::find_or(gpu, "FullscreenMode", fullscreenMode); + isHDRAllowed = toml::find_or(gpu, "allowHDR", isHDRAllowed); + + entry_count += gpu.size(); } if (data.contains("Vulkan")) { const toml::value& vk = data.at("Vulkan"); - gpuId = toml::find_or(vk, "gpuId", -1); - vkValidation = toml::find_or(vk, "validation", false); - vkValidationSync = toml::find_or(vk, "validation_sync", false); - vkValidationGpu = toml::find_or(vk, "validation_gpu", true); - vkCrashDiagnostic = toml::find_or(vk, "crashDiagnostic", false); - vkHostMarkers = toml::find_or(vk, "hostMarkers", false); - vkGuestMarkers = toml::find_or(vk, "guestMarkers", false); - rdocEnable = toml::find_or(vk, "rdocEnable", false); + gpuId = toml::find_or(vk, "gpuId", gpuId); + vkValidation = toml::find_or(vk, "validation", vkValidation); + vkValidationSync = toml::find_or(vk, "validation_sync", vkValidationSync); + vkValidationGpu = toml::find_or(vk, "validation_gpu", vkValidationGpu); + vkCrashDiagnostic = toml::find_or(vk, "crashDiagnostic", vkCrashDiagnostic); + vkHostMarkers = toml::find_or(vk, "hostMarkers", vkHostMarkers); + vkGuestMarkers = toml::find_or(vk, "guestMarkers", vkGuestMarkers); + rdocEnable = toml::find_or(vk, "rdocEnable", rdocEnable); + + entry_count += vk.size(); } if (data.contains("Debug")) { const toml::value& debug = data.at("Debug"); - isDebugDump = toml::find_or(debug, "DebugDump", false); - isSeparateLogFilesEnabled = toml::find_or(debug, "isSeparateLogFilesEnabled", false); - isShaderDebug = toml::find_or(debug, "CollectShader", false); - isFpsColor = toml::find_or(debug, "FPSColor", true); + isDebugDump = toml::find_or(debug, "DebugDump", isDebugDump); + isSeparateLogFilesEnabled = + toml::find_or(debug, "isSeparateLogFilesEnabled", isSeparateLogFilesEnabled); + isShaderDebug = toml::find_or(debug, "CollectShader", isShaderDebug); + isFpsColor = toml::find_or(debug, "FPSColor", isFpsColor); + + entry_count += debug.size(); } if (data.contains("GUI")) { const toml::value& gui = data.at("GUI"); - load_game_size = toml::find_or(gui, "loadGameSizeEnabled", true); + load_game_size = toml::find_or(gui, "loadGameSizeEnabled", load_game_size); const auto install_dir_array = toml::find_or>(gui, "installDirs", {}); @@ -661,20 +724,32 @@ void load(const std::filesystem::path& path) { {std::filesystem::path{install_dir_array[i]}, install_dirs_enabled[i]}); } - save_data_path = toml::find_fs_path_or(gui, "saveDataPath", {}); + save_data_path = toml::find_fs_path_or(gui, "saveDataPath", save_data_path); - settings_addon_install_dir = toml::find_fs_path_or(gui, "addonInstallDir", {}); + settings_addon_install_dir = + toml::find_fs_path_or(gui, "addonInstallDir", settings_addon_install_dir); + + entry_count += gui.size(); } if (data.contains("Settings")) { const toml::value& settings = data.at("Settings"); + m_language = toml::find_or(settings, "consoleLanguage", m_language); - m_language = toml::find_or(settings, "consoleLanguage", 1); + entry_count += settings.size(); } if (data.contains("Keys")) { const toml::value& keys = data.at("Keys"); - trophyKey = toml::find_or(keys, "TrophyKey", ""); + trophyKey = toml::find_or(keys, "TrophyKey", trophyKey); + + entry_count += keys.size(); + } + + // Run save after loading to generate any missing fields with default values. + if (entry_count != total_entries) { + fmt::print("Outdated config detected, updating config file.\n"); + save(path); } } @@ -751,11 +826,14 @@ void save(const std::filesystem::path& path) { data["Input"]["specialPadClass"] = specialPadClass; data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled; data["Input"]["useUnifiedInputConfig"] = useUnifiedInputConfig; - data["GPU"]["screenWidth"] = screenWidth; - data["GPU"]["screenHeight"] = screenHeight; + data["GPU"]["screenWidth"] = windowWidth; + data["GPU"]["screenHeight"] = windowHeight; + data["GPU"]["internalScreenWidth"] = internalScreenWidth; + data["GPU"]["internalScreenHeight"] = internalScreenHeight; data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["copyGPUBuffers"] = shouldCopyGPUBuffers; data["GPU"]["readbacks"] = readbacksEnabled; + data["GPU"]["readbackLinearImages"] = readbackLinearImagesEnabled; data["GPU"]["directMemoryAccess"] = directMemoryAccessEnabled; data["GPU"]["dumpShaders"] = shouldDumpShaders; data["GPU"]["patchShaders"] = shouldPatchShaders; @@ -822,32 +900,53 @@ void save(const std::filesystem::path& path) { } void setDefaultValues() { - isHDRAllowed = false; + // General isNeo = false; isDevKit = false; isPSNSignedIn = false; - isFullscreen = false; isTrophyPopupDisabled = false; - enableDiscordRPC = true; - screenWidth = 1280; - screenHeight = 720; + trophyNotificationDuration = 6.0; + enableDiscordRPC = false; logFilter = ""; logType = "sync"; userName = "shadPS4"; - chooseHomeTab = "General"; - cursorState = HideCursorState::Idle; - cursorHideTimeout = 5; - trophyNotificationDuration = 6.0; - useSpecialPad = false; - specialPadClass = 1; - isDebugDump = false; - isShaderDebug = false; isShowSplash = false; isSideTrophy = "right"; + compatibilityData = false; + checkCompatibilityOnStartup = false; + + // Input + cursorState = HideCursorState::Idle; + cursorHideTimeout = 5; + useSpecialPad = false; + specialPadClass = 1; + isMotionControlsEnabled = true; + useUnifiedInputConfig = true; + overrideControllerColor = false; + controllerCustomColorRGB[0] = 0; + controllerCustomColorRGB[1] = 0; + controllerCustomColorRGB[2] = 255; + + // GPU + windowWidth = 1280; + windowHeight = 720; + internalScreenWidth = 1280; + internalScreenHeight = 720; isNullGpu = false; + shouldCopyGPUBuffers = false; + readbacksEnabled = false; + readbackLinearImagesEnabled = false; + directMemoryAccessEnabled = false; shouldDumpShaders = false; + shouldPatchShaders = false; vblankDivider = 1; + isFullscreen = false; + fullscreenMode = "Windowed"; + isHDRAllowed = false; + + // Vulkan + gpuId = -1; vkValidation = false; vkValidationSync = false; vkValidationGpu = false; @@ -855,10 +954,18 @@ void setDefaultValues() { vkHostMarkers = false; vkGuestMarkers = false; rdocEnable = false; + + // Debug + isDebugDump = false; + isShaderDebug = false; + isSeparateLogFilesEnabled = false; + isFpsColor = true; + + // GUI + load_game_size = true; + + // Settings m_language = 1; - gpuId = -1; - compatibilityData = false; - checkCompatibilityOnStartup = false; } constexpr std::string_view GetDefaultKeyboardConfig() { diff --git a/src/common/config.h b/src/common/config.h index 931fa68e2..e54425676 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -25,10 +25,14 @@ bool getIsFullscreen(); void setIsFullscreen(bool enable); std::string getFullscreenMode(); void setFullscreenMode(std::string mode); -u32 getScreenWidth(); -u32 getScreenHeight(); -void setScreenWidth(u32 width); -void setScreenHeight(u32 height); +u32 getWindowWidth(); +u32 getWindowHeight(); +void setWindowWidth(u32 width); +void setWindowHeight(u32 height); +u32 getInternalScreenWidth(); +u32 getInternalScreenHeight(); +void setInternalScreenWidth(u32 width); +void setInternalScreenHeight(u32 height); bool debugDump(); void setDebugDump(bool enable); s32 getGpuId(); @@ -47,6 +51,7 @@ bool copyGPUCmdBuffers(); void setCopyGPUCmdBuffers(bool enable); bool readbacks(); void setReadbacks(bool enable); +bool readbackLinearImages(); bool directMemoryAccess(); void setDirectMemoryAccess(bool enable); bool dumpShaders(); diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp index 2e29f70ee..846bb5eb4 100644 --- a/src/core/address_space.cpp +++ b/src/core/address_space.cpp @@ -358,9 +358,17 @@ enum PosixPageProtection { [[nodiscard]] constexpr PosixPageProtection ToPosixProt(Core::MemoryProt prot) { if (True(prot & Core::MemoryProt::CpuReadWrite) || True(prot & Core::MemoryProt::GpuReadWrite)) { - return PAGE_READWRITE; + if (True(prot & Core::MemoryProt::CpuExec)) { + return PAGE_EXECUTE_READWRITE; + } else { + return PAGE_READWRITE; + } } else if (True(prot & Core::MemoryProt::CpuRead) || True(prot & Core::MemoryProt::GpuRead)) { - return PAGE_READONLY; + if (True(prot & Core::MemoryProt::CpuExec)) { + return PAGE_EXECUTE_READ; + } else { + return PAGE_READONLY; + } } else { return PAGE_NOACCESS; } diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp index 8512858e9..e4f65cd31 100644 --- a/src/core/cpu_patches.cpp +++ b/src/core/cpu_patches.cpp @@ -163,7 +163,9 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan mask = (1ULL << length) - 1; } - ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64."); + if (length + index > 64) { + mask = 0xFFFF'FFFF'FFFF'FFFF; + } // Get lower qword from xmm register c.vmovq(scratch1, xmm_dst); @@ -177,8 +179,8 @@ static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operan c.mov(scratch2, mask); c.and_(scratch1, scratch2); - // Writeback to xmm register, extrq instruction says top 64-bits are undefined so we don't - // care to preserve them + // Writeback to xmm register, extrq instruction says top 64-bits are undefined but zeroed on + // AMD CPUs c.vmovq(xmm_dst, scratch1); c.pop(scratch2); @@ -287,7 +289,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper mask_value = (1ULL << length) - 1; } - ASSERT_MSG(length + index <= 64, "length + index must be less than or equal to 64."); + if (length + index > 64) { + mask_value = 0xFFFF'FFFF'FFFF'FFFF; + } c.vmovq(scratch1, xmm_src); c.vmovq(scratch2, xmm_dst); @@ -307,8 +311,9 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper // dst |= src c.or_(scratch2, scratch1); - // Insert scratch2 into low 64 bits of dst, upper 64 bits are unaffected - c.vpinsrq(xmm_dst, xmm_dst, scratch2, 0); + // Insert scratch2 into low 64 bits of dst, upper 64 bits are undefined but zeroed on AMD + // CPUs + c.vmovq(xmm_dst, scratch2); c.pop(mask); c.pop(scratch2); @@ -374,7 +379,7 @@ static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* oper c.and_(scratch2, mask); c.or_(scratch2, scratch1); - // Upper 64 bits are undefined in insertq + // Upper 64 bits are undefined in insertq but AMD CPUs zero them c.vmovq(xmm_dst, scratch2); c.pop(mask); @@ -635,6 +640,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { lowQWordDst >>= index; lowQWordDst &= mask; + memset((u8*)dst + sizeof(u64), 0, sizeof(u64)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); Common::IncrementRip(ctx, 4); @@ -675,6 +681,7 @@ static bool TryExecuteIllegalInstruction(void* ctx, void* code_address) { lowQWordDst &= ~(mask << index); lowQWordDst |= lowQWordSrc << index; + memset((u8*)dst + sizeof(u64), 0, sizeof(u64)); memcpy(dst, &lowQWordDst, sizeof(lowQWordDst)); Common::IncrementRip(ctx, 4); diff --git a/src/core/libraries/ime/ime.cpp b/src/core/libraries/ime/ime.cpp index 1c61bc276..54e856e87 100644 --- a/src/core/libraries/ime/ime.cpp +++ b/src/core/libraries/ime/ime.cpp @@ -43,8 +43,8 @@ public: openEvent.param.rect.x = m_param.ime.posx; openEvent.param.rect.y = m_param.ime.posy; } else { - openEvent.param.resource_id_array.userId = 1; - openEvent.param.resource_id_array.resourceId[0] = 1; + openEvent.param.resource_id_array.user_id = 1; + openEvent.param.resource_id_array.resource_id[0] = 1; } // Are we supposed to call the event handler on init with @@ -59,10 +59,10 @@ public: } } - s32 Update(OrbisImeEventHandler handler) { + Error Update(OrbisImeEventHandler handler) { if (!m_ime_mode) { /* We don't handle any events for ImeKeyboard */ - return ORBIS_OK; + return Error::OK; } std::unique_lock lock{g_ime_state.queue_mutex}; @@ -73,7 +73,7 @@ public: Execute(handler, &event, false); } - return ORBIS_OK; + return Error::OK; } void Execute(OrbisImeEventHandler handler, OrbisImeEvent* event, bool use_param_handler) { @@ -94,14 +94,14 @@ public: } } - s32 SetText(const char16_t* text, u32 length) { + Error SetText(const char16_t* text, u32 length) { g_ime_state.SetText(text, length); - return ORBIS_OK; + return Error::OK; } - s32 SetCaret(const OrbisImeCaret* caret) { + Error SetCaret(const OrbisImeCaret* caret) { g_ime_state.SetCaret(caret->index); - return ORBIS_OK; + return Error::OK; } bool IsIme() { @@ -222,11 +222,11 @@ int PS4_SYSV_ABI sceImeGetPanelPositionAndForm() { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height) { +Error PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height) { LOG_INFO(Lib_Ime, "called"); if (!width || !height) { - return ORBIS_IME_ERROR_INVALID_ADDRESS; + return Error::INVALID_ADDRESS; } switch (param->type) { @@ -244,18 +244,18 @@ s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* break; } - return ORBIS_OK; + return Error::OK; } -s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) { +Error PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) { LOG_INFO(Lib_Ime, "(STUBBED) called"); if (!g_keyboard_handler) { - return ORBIS_IME_ERROR_NOT_OPENED; + return Error::NOT_OPENED; } g_keyboard_handler.release(); - return ORBIS_OK; + return Error::OK; } int PS4_SYSV_ABI sceImeKeyboardGetInfo() { @@ -268,25 +268,25 @@ int PS4_SYSV_ABI sceImeKeyboardGetResourceId() { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) { +Error PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) { LOG_INFO(Lib_Ime, "called"); if (!param) { - return ORBIS_IME_ERROR_INVALID_ADDRESS; + return Error::INVALID_ADDRESS; } if (!param->arg) { - return ORBIS_IME_ERROR_INVALID_ARG; + return Error::INVALID_ARG; } if (!param->handler) { - return ORBIS_IME_ERROR_INVALID_HANDLER; + return Error::INVALID_HANDLER; } if (g_keyboard_handler) { - return ORBIS_IME_ERROR_BUSY; + return Error::BUSY; } g_keyboard_handler = std::make_unique(param); - return ORBIS_OK; + return Error::OK; } int PS4_SYSV_ABI sceImeKeyboardOpenInternal() { @@ -304,18 +304,18 @@ int PS4_SYSV_ABI sceImeKeyboardUpdate() { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended) { +Error PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const OrbisImeParamExtended* extended) { LOG_INFO(Lib_Ime, "called"); if (!param) { - return ORBIS_IME_ERROR_INVALID_ADDRESS; + return Error::INVALID_ADDRESS; } if (g_ime_handler) { - return ORBIS_IME_ERROR_BUSY; + return Error::BUSY; } g_ime_handler = std::make_unique(param); - return ORBIS_OK; + return Error::OK; } int PS4_SYSV_ABI sceImeOpenInternal() { @@ -339,27 +339,27 @@ int PS4_SYSV_ABI sceImeSetCandidateIndex() { return ORBIS_OK; } -int PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) { +Error PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) { LOG_TRACE(Lib_Ime, "called"); if (!g_ime_handler) { - return ORBIS_IME_ERROR_NOT_OPENED; + return Error::NOT_OPENED; } if (!caret) { - return ORBIS_IME_ERROR_INVALID_ADDRESS; + return Error::INVALID_ADDRESS; } return g_ime_handler->SetCaret(caret); } -s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) { +Error PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) { LOG_TRACE(Lib_Ime, "called"); if (!g_ime_handler) { - return ORBIS_IME_ERROR_NOT_OPENED; + return Error::NOT_OPENED; } if (!text) { - return ORBIS_IME_ERROR_INVALID_ADDRESS; + return Error::INVALID_ADDRESS; } return g_ime_handler->SetText(text, length); @@ -370,7 +370,7 @@ int PS4_SYSV_ABI sceImeSetTextGeometry() { return ORBIS_OK; } -s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) { +Error PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) { if (g_ime_handler) { g_ime_handler->Update(handler); } @@ -380,10 +380,10 @@ s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) { } if (!g_ime_handler || !g_keyboard_handler) { - return ORBIS_IME_ERROR_NOT_OPENED; + return Error::NOT_OPENED; } - return ORBIS_OK; + return Error::OK; } int PS4_SYSV_ABI sceImeVshClearPreedit() { diff --git a/src/core/libraries/ime/ime.h b/src/core/libraries/ime/ime.h index fcf381048..c2b80809c 100644 --- a/src/core/libraries/ime/ime.h +++ b/src/core/libraries/ime/ime.h @@ -13,72 +13,6 @@ class SymbolsResolver; namespace Libraries::Ime { -constexpr u32 ORBIS_IME_MAX_TEXT_LENGTH = 2048; - -enum class OrbisImeKeyboardOption : u32 { - Default = 0, - Repeat = 1, - RepeatEachKey = 2, - AddOsk = 4, - EffectiveWithIme = 8, - DisableResume = 16, - DisableCapslockWithoutShift = 32, -}; -DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption) - -enum class OrbisImeOption : u32 { - DEFAULT = 0, - MULTILINE = 1, - NO_AUTO_CAPITALIZATION = 2, - PASSWORD = 4, - LANGUAGES_FORCED = 8, - EXT_KEYBOARD = 16, - NO_LEARNING = 32, - FIXED_POSITION = 64, - DISABLE_RESUME = 256, - DISABLE_AUTO_SPACE = 512, - DISABLE_POSITION_ADJUSTMENT = 2048, - EXPANDED_PREEDIT_BUFFER = 4096, - USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192, - USE_2K_COORDINATES = 16384, -}; -DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption) - -struct OrbisImeKeyboardParam { - OrbisImeKeyboardOption option; - s8 reserved1[4]; - void* arg; - OrbisImeEventHandler handler; - s8 reserved2[8]; -}; - -struct OrbisImeParam { - s32 user_id; - OrbisImeType type; - u64 supported_languages; - OrbisImeEnterLabel enter_label; - OrbisImeInputMethod input_method; - OrbisImeTextFilter filter; - OrbisImeOption option; - u32 maxTextLength; - char16_t* inputTextBuffer; - float posx; - float posy; - OrbisImeHorizontalAlignment horizontal_alignment; - OrbisImeVerticalAlignment vertical_alignment; - void* work; - void* arg; - OrbisImeEventHandler handler; - s8 reserved[8]; -}; - -struct OrbisImeCaret { - f32 x; - f32 y; - u32 height; - u32 index; -}; - int PS4_SYSV_ABI FinalizeImeModule(); int PS4_SYSV_ABI InitializeImeModule(); int PS4_SYSV_ABI sceImeCheckFilterText(); @@ -98,22 +32,22 @@ int PS4_SYSV_ABI sceImeDisableController(); int PS4_SYSV_ABI sceImeFilterText(); int PS4_SYSV_ABI sceImeForTestFunction(); int PS4_SYSV_ABI sceImeGetPanelPositionAndForm(); -s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height); -s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId); +Error PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* height); +Error PS4_SYSV_ABI sceImeKeyboardClose(s32 userId); int PS4_SYSV_ABI sceImeKeyboardGetInfo(); int PS4_SYSV_ABI sceImeKeyboardGetResourceId(); -s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param); +Error PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param); int PS4_SYSV_ABI sceImeKeyboardOpenInternal(); int PS4_SYSV_ABI sceImeKeyboardSetMode(); int PS4_SYSV_ABI sceImeKeyboardUpdate(); -s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended); +Error PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const OrbisImeParamExtended* extended); int PS4_SYSV_ABI sceImeOpenInternal(); void PS4_SYSV_ABI sceImeParamInit(OrbisImeParam* param); int PS4_SYSV_ABI sceImeSetCandidateIndex(); -s32 PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret); -s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length); +Error PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret); +Error PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length); int PS4_SYSV_ABI sceImeSetTextGeometry(); -s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler); +Error PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler); int PS4_SYSV_ABI sceImeVshClearPreedit(); int PS4_SYSV_ABI sceImeVshClose(); int PS4_SYSV_ABI sceImeVshConfirmPreedit(); diff --git a/src/core/libraries/ime/ime_common.h b/src/core/libraries/ime/ime_common.h index 96f073dc5..5c0030030 100644 --- a/src/core/libraries/ime/ime_common.h +++ b/src/core/libraries/ime/ime_common.h @@ -3,9 +3,108 @@ #pragma once +#include "common/enum.h" #include "common/types.h" #include "core/libraries/rtc/rtc.h" +constexpr u32 ORBIS_IME_MAX_TEXT_LENGTH = 2048; +constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 2048; + +enum class Error : u32 { + OK = 0x0, + BUSY = 0x80bc0001, + NOT_OPENED = 0x80bc0002, + NO_MEMORY = 0x80bc0003, + CONNECTION_FAILED = 0x80bc0004, + TOO_MANY_REQUESTS = 0x80bc0005, + INVALID_TEXT = 0x80bc0006, + EVENT_OVERFLOW = 0x80bc0007, + NOT_ACTIVE = 0x80bc0008, + IME_SUSPENDING = 0x80bc0009, + DEVICE_IN_USE = 0x80bc000a, + INVALID_USER_ID = 0x80bc0010, + INVALID_TYPE = 0x80bc0011, + INVALID_SUPPORTED_LANGUAGES = 0x80bc0012, + INVALID_ENTER_LABEL = 0x80bc0013, + INVALID_INPUT_METHOD = 0x80bc0014, + INVALID_OPTION = 0x80bc0015, + INVALID_MAX_TEXT_LENGTH = 0x80bc0016, + INVALID_INPUT_TEXT_BUFFER = 0x80bc0017, + INVALID_POSX = 0x80bc0018, + INVALID_POSY = 0x80bc0019, + INVALID_HORIZONTALIGNMENT = 0x80bc001a, + INVALID_VERTICALALIGNMENT = 0x80bc001b, + INVALID_EXTENDED = 0x80bc001c, + INVALID_KEYBOARD_TYPE = 0x80bc001d, + INVALID_WORK = 0x80bc0020, + INVALID_ARG = 0x80bc0021, + INVALID_HANDLER = 0x80bc0022, + NO_RESOURCE_ID = 0x80bc0023, + INVALID_MODE = 0x80bc0024, + INVALID_PARAM = 0x80bc0030, + INVALID_ADDRESS = 0x80bc0031, + INVALID_RESERVED = 0x80bc0032, + INVALID_TIMING = 0x80bc0033, + INTERNAL = 0x80bc00ff, + DIALOG_INVALID_TITLE = 0x80bc0101, + DIALOG_NOT_RUNNING = 0x80bc0105, + DIALOG_NOT_FINISHED = 0x80bc0106, + DIALOG_NOT_IN_USE = 0x80bc0107 +}; + +enum class OrbisImeOption : u32 { + DEFAULT = 0, + MULTILINE = 1, + NO_AUTO_CAPITALIZATION = 2, + PASSWORD = 4, + LANGUAGES_FORCED = 8, + EXT_KEYBOARD = 16, + NO_LEARNING = 32, + FIXED_POSITION = 64, + DISABLE_COPY_PASTE = 128, + DISABLE_RESUME = 256, + DISABLE_AUTO_SPACE = 512, + DISABLE_POSITION_ADJUSTMENT = 2048, + EXPANDED_PREEDIT_BUFFER = 4096, + USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192, + USE_2K_COORDINATES = 16384, +}; +DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption); + +enum class OrbisImeLanguage : u64 { + DANISH = 0x0000000000000001, + GERMAN = 0x0000000000000002, + ENGLISH_US = 0x0000000000000004, + SPANISH = 0x0000000000000008, + FRENCH = 0x0000000000000010, + ITALIAN = 0x0000000000000020, + DUTCH = 0x0000000000000040, + NORWEGIAN = 0x0000000000000080, + POLISH = 0x0000000000000100, + PORTUGUESE_PT = 0x0000000000000200, + RUSSIAN = 0x0000000000000400, + FINNISH = 0x0000000000000800, + SWEDISH = 0x0000000000001000, + JAPANESE = 0x0000000000002000, + KOREAN = 0x0000000000004000, + SIMPLIFIED_CHINESE = 0x0000000000008000, + TRADITIONAL_CHINESE = 0x0000000000010000, + PORTUGUESE_BR = 0x0000000000020000, + ENGLISH_GB = 0x0000000000040000, + TURKISH = 0x0000000000080000, + SPANISH_LA = 0x0000000000100000, + ARABIC = 0x0000000001000000, + FRENCH_CA = 0x0000000002000000, + THAI = 0x0000000004000000, + CZECH = 0x0000000008000000, + GREEK = 0x0000000010000000, + INDONESIAN = 0x0000000020000000, + VIETNAMESE = 0x0000000040000000, + ROMANIAN = 0x0000000080000000, + HUNGARIAN = 0x0000000100000000, +}; +DECLARE_ENUM_FLAG_OPERATORS(OrbisImeLanguage); + enum class OrbisImeType : u32 { Default = 0, BasicLatin = 1, @@ -41,6 +140,7 @@ enum class OrbisImeEventId : u32 { Open = 0, UpdateText = 1, UpdateCaret = 2, + ChangeSize = 3, PressClose = 4, PressEnter = 5, Abort = 6, @@ -51,6 +151,10 @@ enum class OrbisImeEventId : u32 { CandidateDone = 11, CandidateCancel = 12, ChangeDevice = 14, + JumpToNextObject = 15, + JumpToBeforeObject = 16, + ChangeWindowType = 17, + ChangeInputMethodState = 18, KeyboardOpen = 256, @@ -110,6 +214,13 @@ enum class OrbisImeDeviceType : u32 { RemoteOsk = 3, }; +enum class OrbisImePanelPriority : u32 { + Default = 0, + Alphabet = 1, + Symbol = 2, + Accent = 3, +}; + struct OrbisImeRect { f32 x; f32 y; @@ -117,8 +228,22 @@ struct OrbisImeRect { u32 height; }; +struct OrbisImeColor { + u8 r; + u8 g; + u8 b; + u8 a; +}; + +enum class OrbisImeTextAreaMode : u32 { + Disable = 0, + Edit = 1, + Preedit = 2, + Select = 3, +}; + struct OrbisImeTextAreaProperty { - u32 mode; // OrbisImeTextAreaMode + OrbisImeTextAreaMode mode; u32 index; s32 length; }; @@ -135,14 +260,14 @@ struct OrbisImeKeycode { char16_t character; u32 status; OrbisImeKeyboardType type; - s32 user_id; + s32 user_id; // Todo: switch to OrbisUserServiceUserId u32 resource_id; Libraries::Rtc::OrbisRtcTick timestamp; }; struct OrbisImeKeyboardResourceIdArray { - s32 userId; - u32 resourceId[5]; + s32 user_id; // Todo: switch to OrbisUserServiceUserId + u32 resource_id[5]; }; enum class OrbisImeCaretMovementDirection : u32 { @@ -159,6 +284,16 @@ enum class OrbisImeCaretMovementDirection : u32 { Bottom = 10, }; +enum class OrbisImePanelType : u32 { + Hide = 0, + Osk = 1, + Dialog = 2, + Candidate = 3, + Edit = 4, + EditAndCandidate = 5, + Accessibility = 6, +}; + union OrbisImeEventParam { OrbisImeRect rect; OrbisImeEditText text; @@ -168,6 +303,7 @@ union OrbisImeEventParam { char16_t* candidate_word; s32 candidate_index; OrbisImeDeviceType device_type; + OrbisImePanelType panel_type; u32 input_method_state; s8 reserved[64]; }; @@ -177,7 +313,95 @@ struct OrbisImeEvent { OrbisImeEventParam param; }; +using OrbisImeExtKeyboardFilter = PS4_SYSV_ABI int (*)(const OrbisImeKeycode* srcKeycode, + u16* outKeycode, u32* outStatus, + void* reserved); + using OrbisImeTextFilter = PS4_SYSV_ABI int (*)(char16_t* outText, u32* outTextLength, const char16_t* srcText, u32 srcTextLength); using OrbisImeEventHandler = PS4_SYSV_ABI void (*)(void* arg, const OrbisImeEvent* e); + +enum class OrbisImeKeyboardOption : u32 { + Default = 0, + Repeat = 1, + RepeatEachKey = 2, + AddOsk = 4, + EffectiveWithIme = 8, + DisableResume = 16, + DisableCapslockWithoutShift = 32, +}; +DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption) + +struct OrbisImeKeyboardParam { + OrbisImeKeyboardOption option; + s8 reserved1[4]; + void* arg; + OrbisImeEventHandler handler; + s8 reserved2[8]; +}; + +struct OrbisImeParam { + s32 user_id; // Todo: switch to OrbisUserServiceUserId + OrbisImeType type; + u64 supported_languages; // OrbisImeLanguage flags + OrbisImeEnterLabel enter_label; + OrbisImeInputMethod input_method; + OrbisImeTextFilter filter; + OrbisImeOption option; + u32 maxTextLength; + char16_t* inputTextBuffer; + f32 posx; + f32 posy; + OrbisImeHorizontalAlignment horizontal_alignment; + OrbisImeVerticalAlignment vertical_alignment; + void* work; + void* arg; + OrbisImeEventHandler handler; + s8 reserved[8]; +}; + +struct OrbisImeCaret { + f32 x; + f32 y; + u32 height; + u32 index; +}; + +struct OrbisImeDialogParam { + s32 user_id; + OrbisImeType type; + u64 supported_languages; // OrbisImeLanguage flags + OrbisImeEnterLabel enter_label; + OrbisImeInputMethod input_method; + OrbisImeTextFilter filter; + OrbisImeOption option; + u32 max_text_length; + char16_t* input_text_buffer; + f32 posx; + f32 posy; + OrbisImeHorizontalAlignment horizontal_alignment; + OrbisImeVerticalAlignment vertical_alignment; + const char16_t* placeholder; + const char16_t* title; + s8 reserved[16]; +}; + +struct OrbisImeParamExtended { + u32 option; // OrbisImeExtOption flags + OrbisImeColor color_base; + OrbisImeColor color_line; + OrbisImeColor color_text_field; + OrbisImeColor color_preedit; + OrbisImeColor color_button_default; + OrbisImeColor color_button_function; + OrbisImeColor color_button_symbol; + OrbisImeColor color_text; + OrbisImeColor color_special; + OrbisImePanelPriority priority; + char* additional_dictionary_path; + OrbisImeExtKeyboardFilter ext_keyboard_filter; + u32 disable_device; + u32 ext_keyboard_mode; + s8 reserved[60]; +}; diff --git a/src/core/libraries/ime/ime_dialog.cpp b/src/core/libraries/ime/ime_dialog.cpp index bee185787..6f808636b 100644 --- a/src/core/libraries/ime/ime_dialog.cpp +++ b/src/core/libraries/ime/ime_dialog.cpp @@ -20,19 +20,19 @@ static OrbisImeDialogResult g_ime_dlg_result{}; static ImeDialogState g_ime_dlg_state{}; static ImeDialogUi g_ime_dlg_ui; -static bool IsValidOption(OrbisImeDialogOption option, OrbisImeType type) { - if (False(~option & - (OrbisImeDialogOption::Multiline | OrbisImeDialogOption::NoAutoCompletion))) { +static bool IsValidOption(OrbisImeOption option, OrbisImeType type) { + if (False(~option & (OrbisImeOption::MULTILINE | + OrbisImeOption::NO_AUTO_CAPITALIZATION /* NoAutoCompletion */))) { return false; } - if (True(option & OrbisImeDialogOption::Multiline) && type != OrbisImeType::Default && + if (True(option & OrbisImeOption::MULTILINE) && type != OrbisImeType::Default && type != OrbisImeType::BasicLatin) { return false; } - if (True(option & OrbisImeDialogOption::NoAutoCompletion) && type != OrbisImeType::Number && - type != OrbisImeType::BasicLatin) { + if (True(option & OrbisImeOption::NO_AUTO_CAPITALIZATION /* NoAutoCompletion */) && + type != OrbisImeType::Number && type != OrbisImeType::BasicLatin) { return false; } @@ -96,7 +96,7 @@ Error PS4_SYSV_ABI sceImeDialogGetPanelSize(const OrbisImeDialogParam* param, u3 case OrbisImeType::Url: case OrbisImeType::Mail: *width = 500; // original: 793 - if (True(param->option & OrbisImeDialogOption::Multiline)) { + if (True(param->option & OrbisImeOption::MULTILINE)) { *height = 300; // original: 576 } else { *height = 150; // original: 476 @@ -149,18 +149,20 @@ OrbisImeDialogStatus PS4_SYSV_ABI sceImeDialogGetStatus() { } Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExtended* extended) { + LOG_INFO(Lib_ImeDialog, ">> sceImeDialogInit: entering, param={}, extended={}", + static_cast(param), static_cast(extended)); if (g_ime_dlg_status != OrbisImeDialogStatus::None) { - LOG_INFO(Lib_ImeDialog, "IME dialog is already running"); + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: busy (status=%u)", (u32)g_ime_dlg_status); return Error::BUSY; } if (param == nullptr) { - LOG_INFO(Lib_ImeDialog, "called with param (NULL)"); + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: param is null"); return Error::INVALID_ADDRESS; } if (!magic_enum::enum_contains(param->type)) { - LOG_INFO(Lib_ImeDialog, "Invalid param->type"); + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid param->type=%u", (u32)param->type); return Error::INVALID_ADDRESS; } @@ -168,16 +170,14 @@ Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExt // TODO: do correct param->supportedLanguages validation if (param->posx < 0.0f || - param->posx >= - MAX_X_POSITIONS[False(param->option & OrbisImeDialogOption::LargeResolution)]) { - LOG_INFO(Lib_ImeDialog, "Invalid param->posx"); + param->posx >= MAX_X_POSITIONS[False(param->option & OrbisImeOption::USE_2K_COORDINATES)]) { + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid posx=%f", param->posx); return Error::INVALID_POSX; } if (param->posy < 0.0f || - param->posy >= - MAX_Y_POSITIONS[False(param->option & OrbisImeDialogOption::LargeResolution)]) { - LOG_INFO(Lib_ImeDialog, "Invalid param->posy"); + param->posy >= MAX_Y_POSITIONS[False(param->option & OrbisImeOption::USE_2K_COORDINATES)]) { + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid posy=%f", param->posy); return Error::INVALID_POSY; } @@ -192,12 +192,13 @@ Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExt } if (!IsValidOption(param->option, param->type)) { - LOG_INFO(Lib_ImeDialog, "Invalid param->option"); + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid option=0x%X for type=%u", + static_cast(param->option), (u32)param->type); return Error::INVALID_PARAM; } if (param->input_text_buffer == nullptr) { - LOG_INFO(Lib_ImeDialog, "Invalid param->inputTextBuffer"); + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: input_text_buffer is null"); return Error::INVALID_INPUT_TEXT_BUFFER; } @@ -220,16 +221,24 @@ Error PS4_SYSV_ABI sceImeDialogInit(OrbisImeDialogParam* param, OrbisImeParamExt } } - if (param->max_text_length > ORBIS_IME_DIALOG_MAX_TEXT_LENGTH) { - LOG_INFO(Lib_ImeDialog, "Invalid param->maxTextLength"); + if (param->max_text_length == 0 || param->max_text_length > ORBIS_IME_MAX_TEXT_LENGTH) { + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: invalid max_text_length=%u", + param->max_text_length); return Error::INVALID_MAX_TEXT_LENGTH; } + // Title string validation + if (param->title != nullptr && !std::char_traits::length(param->title)) { + LOG_ERROR(Lib_ImeDialog, "sceImeDialogInit: title is empty"); + return Error::INVALID_PARAM; + } + g_ime_dlg_result = {}; g_ime_dlg_state = ImeDialogState(param, extended); g_ime_dlg_status = OrbisImeDialogStatus::Running; g_ime_dlg_ui = ImeDialogUi(&g_ime_dlg_state, &g_ime_dlg_status, &g_ime_dlg_result); + LOG_INFO(Lib_ImeDialog, "<< sceImeDialogInit: successful, status now=Running"); return Error::OK; } diff --git a/src/core/libraries/ime/ime_dialog.h b/src/core/libraries/ime/ime_dialog.h index 526e5f022..a056fdd5e 100644 --- a/src/core/libraries/ime/ime_dialog.h +++ b/src/core/libraries/ime/ime_dialog.h @@ -13,50 +13,6 @@ class SymbolsResolver; namespace Libraries::ImeDialog { -constexpr u32 ORBIS_IME_DIALOG_MAX_TEXT_LENGTH = 2048; - -enum class Error : u32 { - OK = 0x0, - BUSY = 0x80bc0001, - NOT_OPENED = 0x80bc0002, - NO_MEMORY = 0x80bc0003, - CONNECTION_FAILED = 0x80bc0004, - TOO_MANY_REQUESTS = 0x80bc0005, - INVALID_TEXT = 0x80bc0006, - EVENT_OVERFLOW = 0x80bc0007, - NOT_ACTIVE = 0x80bc0008, - IME_SUSPENDING = 0x80bc0009, - DEVICE_IN_USE = 0x80bc000a, - INVALID_USER_ID = 0x80bc0010, - INVALID_TYPE = 0x80bc0011, - INVALID_SUPPORTED_LANGUAGES = 0x80bc0012, - INVALID_ENTER_LABEL = 0x80bc0013, - INVALID_INPUT_METHOD = 0x80bc0014, - INVALID_OPTION = 0x80bc0015, - INVALID_MAX_TEXT_LENGTH = 0x80bc0016, - INVALID_INPUT_TEXT_BUFFER = 0x80bc0017, - INVALID_POSX = 0x80bc0018, - INVALID_POSY = 0x80bc0019, - INVALID_HORIZONTALIGNMENT = 0x80bc001a, - INVALID_VERTICALALIGNMENT = 0x80bc001b, - INVALID_EXTENDED = 0x80bc001c, - INVALID_KEYBOARD_TYPE = 0x80bc001d, - INVALID_WORK = 0x80bc0020, - INVALID_ARG = 0x80bc0021, - INVALID_HANDLER = 0x80bc0022, - NO_RESOURCE_ID = 0x80bc0023, - INVALID_MODE = 0x80bc0024, - INVALID_PARAM = 0x80bc0030, - INVALID_ADDRESS = 0x80bc0031, - INVALID_RESERVED = 0x80bc0032, - INVALID_TIMING = 0x80bc0033, - INTERNAL = 0x80bc00ff, - DIALOG_INVALID_TITLE = 0x80bc0101, - DIALOG_NOT_RUNNING = 0x80bc0105, - DIALOG_NOT_FINISHED = 0x80bc0106, - DIALOG_NOT_IN_USE = 0x80bc0107, -}; - enum class OrbisImeDialogStatus : u32 { None = 0, Running = 1, @@ -69,87 +25,11 @@ enum class OrbisImeDialogEndStatus : u32 { Aborted = 2, }; -enum class OrbisImeDialogOption : u32 { - Default = 0, - Multiline = 1, - NoAutoCorrection = 2, - NoAutoCompletion = 4, - // TODO: Document missing options - LargeResolution = 1024, -}; -DECLARE_ENUM_FLAG_OPERATORS(OrbisImeDialogOption) - -enum class OrbisImePanelPriority : u32 { - Default = 0, - Alphabet = 1, - Symbol = 2, - Accent = 3, -}; - -struct OrbisImeColor { - u8 r; - u8 g; - u8 b; - u8 a; -}; - struct OrbisImeDialogResult { OrbisImeDialogEndStatus endstatus; s32 reserved[12]; }; -struct OrbisImeKeycode { - u16 keycode; - char16_t character; - u32 status; - OrbisImeKeyboardType type; - s32 user_id; - u32 resource_id; - u64 timestamp; -}; - -using OrbisImeExtKeyboardFilter = PS4_SYSV_ABI int (*)(const OrbisImeKeycode* srcKeycode, - u16* outKeycode, u32* outStatus, - void* reserved); - -struct OrbisImeDialogParam { - s32 user_id; - OrbisImeType type; - u64 supported_languages; - OrbisImeEnterLabel enter_label; - OrbisImeInputMethod input_method; - OrbisImeTextFilter filter; - OrbisImeDialogOption option; - u32 max_text_length; - char16_t* input_text_buffer; - float posx; - float posy; - OrbisImeHorizontalAlignment horizontal_alignment; - OrbisImeVerticalAlignment vertical_alignment; - const char16_t* placeholder; - const char16_t* title; - s8 reserved[16]; -}; - -struct OrbisImeParamExtended { - u32 option; // OrbisImeDialogOptionExtended - OrbisImeColor color_base; - OrbisImeColor color_line; - OrbisImeColor color_text_field; - OrbisImeColor color_preedit; - OrbisImeColor color_button_default; - OrbisImeColor color_button_function; - OrbisImeColor color_button_symbol; - OrbisImeColor color_text; - OrbisImeColor color_special; - OrbisImePanelPriority priority; - char* additional_dictionary_path; - OrbisImeExtKeyboardFilter ext_keyboard_filter; - uint32_t disable_device; - uint32_t ext_keyboard_mode; - int8_t reserved[60]; -}; - Error PS4_SYSV_ABI sceImeDialogAbort(); Error PS4_SYSV_ABI sceImeDialogForceClose(); Error PS4_SYSV_ABI sceImeDialogForTestFunction(); diff --git a/src/core/libraries/ime/ime_dialog_ui.cpp b/src/core/libraries/ime/ime_dialog_ui.cpp index 51183c79b..800ba1124 100644 --- a/src/core/libraries/ime/ime_dialog_ui.cpp +++ b/src/core/libraries/ime/ime_dialog_ui.cpp @@ -21,12 +21,16 @@ namespace Libraries::ImeDialog { ImeDialogState::ImeDialogState(const OrbisImeDialogParam* param, const OrbisImeParamExtended* extended) { + LOG_INFO(Lib_ImeDialog, ">> ImeDialogState::Ctor: param={}, text_buffer={}", + static_cast(param), + static_cast(param ? param->input_text_buffer : nullptr)); if (!param) { + LOG_ERROR(Lib_ImeDialog, " param==nullptr, returning without init"); return; } user_id = param->user_id; - is_multi_line = True(param->option & OrbisImeDialogOption::Multiline); + is_multi_line = True(param->option & OrbisImeOption::MULTILINE); is_numeric = param->type == OrbisImeType::Number; type = param->type; enter_label = param->enter_label; @@ -220,6 +224,7 @@ void ImeDialogUi::Free() { void ImeDialogUi::Draw() { std::unique_lock lock{draw_mutex}; + LOG_INFO(Lib_ImeDialog, ">> ImeDialogUi::Draw: first_render=%d", first_render); if (!state) { return; @@ -259,9 +264,13 @@ void ImeDialogUi::Draw() { } if (state->is_multi_line) { + LOG_INFO(Lib_ImeDialog, " Drawing multi-line widget…"); DrawMultiLineInputText(); + LOG_INFO(Lib_ImeDialog, " Done DrawMultiLineInputText"); } else { + LOG_INFO(Lib_ImeDialog, " Drawing input text widget…"); DrawInputText(); + LOG_INFO(Lib_ImeDialog, " Done DrawInputText"); } SetCursorPosY(GetCursorPosY() + 10.0f); @@ -306,6 +315,7 @@ void ImeDialogUi::Draw() { End(); first_render = false; + LOG_INFO(Lib_ImeDialog, "<< ImeDialogUi::Draw complete"); } void ImeDialogUi::DrawInputText() { @@ -316,7 +326,7 @@ void ImeDialogUi::DrawInputText() { } const char* placeholder = state->placeholder.empty() ? nullptr : state->placeholder.data(); if (InputTextEx("##ImeDialogInput", placeholder, state->current_text.begin(), - state->max_text_length, input_size, ImGuiInputTextFlags_CallbackCharFilter, + state->max_text_length + 1, input_size, ImGuiInputTextFlags_CallbackCharFilter, InputTextCallback, this)) { state->input_changed = true; } @@ -332,7 +342,7 @@ void ImeDialogUi::DrawMultiLineInputText() { } const char* placeholder = state->placeholder.empty() ? nullptr : state->placeholder.data(); if (InputTextEx("##ImeDialogInput", placeholder, state->current_text.begin(), - state->max_text_length, input_size, flags, InputTextCallback, this)) { + state->max_text_length + 1, input_size, flags, InputTextCallback, this)) { state->input_changed = true; } } @@ -341,13 +351,19 @@ int ImeDialogUi::InputTextCallback(ImGuiInputTextCallbackData* data) { ImeDialogUi* ui = static_cast(data->UserData); ASSERT(ui); + LOG_DEBUG(Lib_ImeDialog, ">> InputTextCallback: EventFlag={}, EventChar={}", data->EventFlag, + data->EventChar); + // Should we filter punctuation? if (ui->state->is_numeric && (data->EventChar < '0' || data->EventChar > '9') && data->EventChar != '\b' && data->EventChar != ',' && data->EventChar != '.') { + LOG_INFO(Lib_ImeDialog, "InputTextCallback: rejecting non-digit char '{}'", + static_cast(data->EventChar)); return 1; } if (!ui->state->keyboard_filter) { + LOG_DEBUG(Lib_ImeDialog, "InputTextCallback: no keyboard_filter, accepting char"); return 0; } @@ -363,20 +379,24 @@ int ImeDialogUi::InputTextCallback(ImGuiInputTextCallbackData* data) { // the current language?) .user_id = ui->state->user_id, .resource_id = 0, - .timestamp = 0, + .timestamp = {0}, }; if (!ui->state->ConvertUTF8ToOrbis(event_char, 4, &src_keycode.character, 1)) { - LOG_ERROR(Lib_ImeDialog, "Failed to convert orbis char to utf8"); + LOG_ERROR(Lib_ImeDialog, "InputTextCallback: ConvertUTF8ToOrbis failed"); return 0; } + LOG_DEBUG(Lib_ImeDialog, "InputTextCallback: converted to Orbis char={:#X}", + static_cast(src_keycode.character)); src_keycode.keycode = src_keycode.character; // TODO set this to the correct value u16 out_keycode; u32 out_status; - ui->state->CallKeyboardFilter(&src_keycode, &out_keycode, &out_status); - + bool keep = ui->state->CallKeyboardFilter(&src_keycode, &out_keycode, &out_status); + LOG_DEBUG(Lib_ImeDialog, + "InputTextCallback: CallKeyboardFilter returned %s (keycode=0x%X, status=0x%X)", + keep ? "true" : "false", out_keycode, out_status); // TODO. set the keycode return 0; diff --git a/src/core/libraries/ime/ime_ui.cpp b/src/core/libraries/ime/ime_ui.cpp index 37f25e200..c49c70ede 100644 --- a/src/core/libraries/ime/ime_ui.cpp +++ b/src/core/libraries/ime/ime_ui.cpp @@ -199,7 +199,7 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) { eventParam.caret_index = data->CursorPos; eventParam.area_num = 1; - eventParam.text_area[0].mode = 1; // Edit mode + eventParam.text_area[0].mode = OrbisImeTextAreaMode::Edit; eventParam.text_area[0].index = data->CursorPos; eventParam.text_area[0].length = data->BufTextLen; diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 114a096ca..e0c359f2c 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -573,11 +573,12 @@ void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, s32 prot, s32 flags, s32 fd, auto* memory = Core::Memory::Instance(); const auto mem_prot = static_cast(prot); const auto mem_flags = static_cast(flags); + const auto is_exec = True(mem_prot & Core::MemoryProt::CpuExec); s32 result = ORBIS_OK; if (fd == -1) { result = memory->MapMemory(&addr_out, std::bit_cast(addr), len, mem_prot, mem_flags, - Core::VMAType::Flexible); + Core::VMAType::Flexible, "anon", is_exec); } else { result = memory->MapFile(&addr_out, std::bit_cast(addr), len, mem_prot, mem_flags, fd, phys_addr); @@ -711,6 +712,7 @@ void RegisterMemory(Core::Loader::SymbolsResolver* sym) { sceKernelConfiguredFlexibleMemorySize); LIB_FUNCTION("vSMAm3cxYTY", "libkernel", 1, "libkernel", 1, 1, sceKernelMprotect); + LIB_FUNCTION("YQOfxL4QfeU", "libkernel", 1, "libkernel", 1, 1, posix_mprotect); LIB_FUNCTION("YQOfxL4QfeU", "libScePosix", 1, "libkernel", 1, 1, posix_mprotect); LIB_FUNCTION("9bfdLIyuwCY", "libkernel", 1, "libkernel", 1, 1, sceKernelMtypeprotect); diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index da715b3bf..0f961923a 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -445,7 +445,8 @@ s32 PS4_SYSV_ABI sceVideoOutConfigureOutputMode_(s32 handle, u32 reserved, const } void RegisterLib(Core::Loader::SymbolsResolver* sym) { - driver = std::make_unique(Config::getScreenWidth(), Config::getScreenHeight()); + driver = std::make_unique(Config::getInternalScreenWidth(), + Config::getInternalScreenHeight()); LIB_FUNCTION("SbU3dwp80lQ", "libSceVideoOut", 1, "libSceVideoOut", 0, 0, sceVideoOutGetFlipStatus); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2372fafb5..3d9bf58a7 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -342,7 +342,7 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo } } - // Limit the minumum address to SystemManagedVirtualBase to prevent hardware-specific issues. + // Limit the minimum address to SystemManagedVirtualBase to prevent hardware-specific issues. VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr; // Fixed mapping means the virtual address must exactly match the provided one. @@ -631,6 +631,9 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea vma_base, u64 size if (True(prot & MemoryProt::CpuReadWrite)) { perms |= Core::MemoryPermission::ReadWrite; } + if (True(prot & MemoryProt::CpuExec)) { + perms |= Core::MemoryPermission::Execute; + } if (True(prot & MemoryProt::GpuRead)) { perms |= Core::MemoryPermission::Read; } @@ -650,9 +653,9 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) { std::scoped_lock lk{mutex}; // Validate protection flags - constexpr static MemoryProt valid_flags = MemoryProt::NoAccess | MemoryProt::CpuRead | - MemoryProt::CpuReadWrite | MemoryProt::GpuRead | - MemoryProt::GpuWrite | MemoryProt::GpuReadWrite; + constexpr static MemoryProt valid_flags = + MemoryProt::NoAccess | MemoryProt::CpuRead | MemoryProt::CpuReadWrite | + MemoryProt::CpuExec | MemoryProt::GpuRead | MemoryProt::GpuWrite | MemoryProt::GpuReadWrite; MemoryProt invalid_flags = prot & ~valid_flags; if (invalid_flags != MemoryProt::NoAccess) { diff --git a/src/core/memory.h b/src/core/memory.h index c800ef763..285d7dbed 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -31,6 +31,7 @@ enum class MemoryProt : u32 { NoAccess = 0, CpuRead = 1, CpuReadWrite = 2, + CpuExec = 4, GpuRead = 16, GpuWrite = 32, GpuReadWrite = 48, diff --git a/src/emulator.cpp b/src/emulator.cpp index fbab5929b..480ceee0b 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -133,6 +133,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector ar LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole()); LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); LOG_INFO(Config, "GPU readbacks: {}", Config::readbacks()); + LOG_INFO(Config, "GPU readbackLinearImages: {}", Config::readbackLinearImages()); LOG_INFO(Config, "GPU directMemoryAccess: {}", Config::directMemoryAccess()); LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); @@ -222,7 +223,7 @@ void Emulator::Run(std::filesystem::path file, const std::vector ar } } window = std::make_unique( - Config::getScreenWidth(), Config::getScreenHeight(), controller, window_title); + Config::getWindowWidth(), Config::getWindowHeight(), controller, window_title); g_window = window.get(); diff --git a/src/qt_gui/settings_dialog.cpp b/src/qt_gui/settings_dialog.cpp index c9d264587..ed2a17e25 100644 --- a/src/qt_gui/settings_dialog.cpp +++ b/src/qt_gui/settings_dialog.cpp @@ -762,8 +762,8 @@ void SettingsDialog::UpdateSettings() { m_gui_settings->SetValue(gui::gl_backgroundMusicVolume, ui->BGMVolumeSlider->value()); Config::setLanguage(languageIndexes[ui->consoleLanguageComboBox->currentIndex()]); Config::setEnableDiscordRPC(ui->discordRPCCheckbox->isChecked()); - Config::setScreenWidth(ui->widthSpinBox->value()); - Config::setScreenHeight(ui->heightSpinBox->value()); + Config::setWindowWidth(ui->widthSpinBox->value()); + Config::setWindowHeight(ui->heightSpinBox->value()); Config::setVblankDiv(ui->vblankSpinBox->value()); Config::setDumpShaders(ui->dumpShadersCheckBox->isChecked()); Config::setNullGpu(ui->nullGpuCheckBox->isChecked()); diff --git a/src/qt_gui/translations/ca_ES.ts b/src/qt_gui/translations/ca_ES.ts index c508a7146..9b7f47c75 100644 --- a/src/qt_gui/translations/ca_ES.ts +++ b/src/qt_gui/translations/ca_ES.ts @@ -527,71 +527,71 @@ unmapped - unmapped + sense assignar L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + Opcions R2 - R2 + R2 Touchpad Left - Touchpad Left + Touchpad esquerra Touchpad Center - Touchpad Center + Touchpad centre Touchpad Right - Touchpad Right + Touchpad dreta Triangle - Triangle + Triangle Square - Square + Quadrat Circle - Circle + Cercle Cross - Cross + Creu Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: + No es pot assignar una entrada més d'una vegada. S'han assignat de manera duplicada pels següents botons: %1 Press a button - Press a button + Clica un botó Move analog stick - Move analog stick + Mou la palanca diff --git a/src/qt_gui/translations/es_ES.ts b/src/qt_gui/translations/es_ES.ts index a31279d37..72b18437c 100644 --- a/src/qt_gui/translations/es_ES.ts +++ b/src/qt_gui/translations/es_ES.ts @@ -527,71 +527,69 @@ unmapped - unmapped + sin vincular L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + Opciones R2 - R2 + R2 Touchpad Left - Touchpad Left + Izquierda del Touchpad Touchpad Center - Touchpad Center + Centro del Touchpad Touchpad Right - Touchpad Right + Derecha del Touchpad Triangle - Triangle + Triángulo Square - Square + Cuadrado Circle - Circle + Círculo Cross - Cross + Equis Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: - -%1 + No se puede asignar un control único más de una vez. Controles duplicados asignados a los siguientes botones Press a button - Press a button + Presiona un botón Move analog stick - Move analog stick + Mueve el stick analógico @@ -778,7 +776,7 @@ Favorite - Favorite + Favorito @@ -984,11 +982,11 @@ Remove from Favorites - Remove from Favorites + Eliminar de Favoritos Add to Favorites - Add to Favorites + Añadir a favoritos @@ -1220,21 +1218,19 @@ Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: - -%1 + No se puede asignar un control único más de una vez. Controles duplicados asignados a los siguientes botones: Touchpad Left - Touchpad Left + Izquierda del Touchpad Touchpad Center - Touchpad Center + Centro del Touchpad Touchpad Right - Touchpad Right + Derecha del Touchpad diff --git a/src/qt_gui/translations/it_IT.ts b/src/qt_gui/translations/it_IT.ts index c636d48e4..5cd1a01f8 100644 --- a/src/qt_gui/translations/it_IT.ts +++ b/src/qt_gui/translations/it_IT.ts @@ -527,71 +527,71 @@ unmapped - unmapped + non mappato L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + Opzioni R2 - R2 + R2 Touchpad Left - Touchpad Left + Touchpad Sinistra Touchpad Center - Touchpad Center + Touchpad Centrale Touchpad Right - Touchpad Right + Touchpad Destra Triangle - Triangle + Triangolo Square - Square + Quadrato Circle - Circle + Cerchio Cross - Cross + Croce Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: + Non è possibile associare più di una volta qualsiasi input univoco. Sono presenti input duplicati mappati ai seguenti pulsanti: %1 Press a button - Press a button + Premi un pulsante Move analog stick - Move analog stick + Muovi levetta analogica @@ -778,7 +778,7 @@ Favorite - Favorite + Preferiti @@ -984,11 +984,11 @@ Remove from Favorites - Remove from Favorites + Rimuovi dai Preferiti Add to Favorites - Add to Favorites + Aggiungi ai preferiti @@ -1226,15 +1226,15 @@ Touchpad Left - Touchpad Left + Touchpad Sinistra Touchpad Center - Touchpad Center + Touchpad Centrale Touchpad Right - Touchpad Right + Touchpad Destra diff --git a/src/qt_gui/translations/nb_NO.ts b/src/qt_gui/translations/nb_NO.ts index 302229933..eea0f6eb7 100644 --- a/src/qt_gui/translations/nb_NO.ts +++ b/src/qt_gui/translations/nb_NO.ts @@ -495,7 +495,7 @@ Override Lightbar Color - Overstyr farge på lyslinja + Overstyr farge på lyslisten Override Color @@ -527,71 +527,71 @@ unmapped - unmapped + Ikke tildelt L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + Options R2 - R2 + R2 Touchpad Left - Touchpad Left + Venstre berøringsplate Touchpad Center - Touchpad Center + Midt berøringsplate Touchpad Right - Touchpad Right + Høyre berøringsplate Triangle - Triangle + Triangel Square - Square + Firkant Circle - Circle + Sirkel Cross - Cross + Kryss Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: + Kan ikke tildele samme inndata mer enn én gang. Dupliserte inndata tildeles følgende taster: %1 Press a button - Press a button + Trykk på en knapp Move analog stick - Move analog stick + Flytt på analog stikke @@ -1226,15 +1226,15 @@ Touchpad Left - Berøringsplate venstre + Venstre berøringsplate Touchpad Center - Berøringsplate midten + Midt berøringsplate Touchpad Right - Berøringsplate høyre + Høyre berøringsplate diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts index c2058bf09..7fa0aea67 100644 --- a/src/qt_gui/translations/pt_BR.ts +++ b/src/qt_gui/translations/pt_BR.ts @@ -555,7 +555,7 @@ Touchpad Center - Touchpad Center + Centro do Touchpad Touchpad Right @@ -571,11 +571,11 @@ Circle - Circle + Círculo Cross - Cross + Cruz Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: @@ -591,7 +591,7 @@ Move analog stick - Move analog stick + Mover analógico diff --git a/src/qt_gui/translations/ru_RU.ts b/src/qt_gui/translations/ru_RU.ts index 7293a956e..1bbc2b3b8 100644 --- a/src/qt_gui/translations/ru_RU.ts +++ b/src/qt_gui/translations/ru_RU.ts @@ -591,7 +591,7 @@ Move analog stick - Move analog stick + Двиньте аналоговый стик diff --git a/src/qt_gui/translations/sq_AL.ts b/src/qt_gui/translations/sq_AL.ts index 71308a99f..9908dc564 100644 --- a/src/qt_gui/translations/sq_AL.ts +++ b/src/qt_gui/translations/sq_AL.ts @@ -527,71 +527,71 @@ unmapped - unmapped + pacaktuar L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + Options R2 - R2 + R2 Touchpad Left - Touchpad Left + Paneli me Prekje Majtas Touchpad Center - Touchpad Center + Paneli me Prekje në Qendër Touchpad Right - Touchpad Right + Paneli me Prekje Djathtas Triangle - Triangle + Trekëndësh Square - Square + Katror Circle - Circle + Rreth Cross - Cross + Kryq Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: + Nuk mund të caktohet e njëjta hyrje unike më shumë se një herë. Hyrjet e dyfishta janë caktuar në butonët e mëposhtëm: %1 Press a button - Press a button + Shtyp një buton Move analog stick - Move analog stick + Lëviz levën diff --git a/src/qt_gui/translations/sv_SE.ts b/src/qt_gui/translations/sv_SE.ts index 83081bf1a..65762f974 100644 --- a/src/qt_gui/translations/sv_SE.ts +++ b/src/qt_gui/translations/sv_SE.ts @@ -527,71 +527,71 @@ unmapped - unmapped + omappad L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + Options R2 - R2 + R2 Touchpad Left - Touchpad Left + Pekplatta vänster Touchpad Center - Touchpad Center + Pekplatta i mitten Touchpad Right - Touchpad Right + Pekplatta höger Triangle - Triangle + Triangel Square - Square + Fyrkant Circle - Circle + Cirkel Cross - Cross + Kors Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: + Det går inte att binda samma unika inmatning mer än en gång. Dubbletta inmatningar är mappade till följande knappar: %1 Press a button - Press a button + Tryck på en knapp Move analog stick - Move analog stick + Rör analog spak diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts index 2956d9621..4504a2c11 100644 --- a/src/qt_gui/translations/zh_CN.ts +++ b/src/qt_gui/translations/zh_CN.ts @@ -527,71 +527,71 @@ unmapped - unmapped + 未映射 L1 - L1 + L1 R1 - R1 + R1 L2 - L2 + L2 Options - Options + 选项 R2 - R2 + R2 Touchpad Left - Touchpad Left + 触摸板左侧 Touchpad Center - Touchpad Center + 触控板中间 Touchpad Right - Touchpad Right + 触摸板右侧 Triangle - Triangle + 三角 Square - Square + 方框 Circle - Circle + Cross - Cross + Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: %1 - Cannot bind any unique input more than once. Duplicate inputs mapped to the following buttons: + 不能多次绑定任何同一输入。请重新映射以下按键的输入: %1 Press a button - Press a button + 请按一个按键 Move analog stick - Move analog stick + 移动模拟摇杆 diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index e37acb2e4..80c8b836b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -54,17 +54,23 @@ Id SharedAtomicU64(EmitContext& ctx, Id offset, Id value, }); } +Id SharedAtomicU64IncDec(EmitContext& ctx, Id offset, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { + const Id shift_id{ctx.ConstU32(3U)}; + const Id index{ctx.OpShiftRightLogical(ctx.U32[1], offset, shift_id)}; + const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)}; + const Id pointer{ctx.EmitSharedMemoryAccess(ctx.shared_u64, ctx.shared_memory_u64, index)}; + const auto [scope, semantics]{AtomicArgs(ctx)}; + return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { + return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics); + }); +} + template Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - const auto type = [&] { - if constexpr (is_float) { - return ctx.F32[1]; - } else { - return ctx.U32[1]; - } - }(); + const Id type = is_float ? ctx.F32[1] : ctx.U32[1]; if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { address = ctx.OpIAdd(ctx.U32[1], address, offset); } @@ -148,42 +154,82 @@ Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); } +Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMax); +} + Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); } +Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMax); +} + Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); } +Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicUMin); +} + Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); } +Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicSMin); +} + Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); } +Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicAnd); +} + Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); } +Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicOr); +} + Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); } +Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicXor); +} + Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value) { return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicISub); } +Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value) { + return SharedAtomicU64(ctx, offset, value, &Sirit::Module::OpAtomicISub); +} + Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset) { return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); } +Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset) { + return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIIncrement); +} + Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset) { return SharedAtomicU32IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); } +Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset) { + return SharedAtomicU64IncDec(ctx, offset, &Sirit::Module::OpAtomicIDecrement); +} + Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 1ac2266bd..8a0c586e9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -139,15 +139,25 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicIAdd64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMax64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMax64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicUMin64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicSMin64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicAnd64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicOr64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicXor64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicInc64(EmitContext& ctx, Id offset); Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset); +Id EmitSharedAtomicDec64(EmitContext& ctx, Id offset); Id EmitSharedAtomicISub32(EmitContext& ctx, Id offset, Id value); +Id EmitSharedAtomicISub64(EmitContext& ctx, Id offset, Id value); Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3); @@ -353,7 +363,7 @@ Id EmitFPIsInf32(EmitContext& ctx, Id value); Id EmitFPIsInf64(EmitContext& ctx, Id value); Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitIAdd64(EmitContext& ctx, Id a, Id b); -Id EmitIAddCary32(EmitContext& ctx, Id a, Id b); +Id EmitIAddCarry32(EmitContext& ctx, Id a, Id b); Id EmitISub32(EmitContext& ctx, Id a, Id b); Id EmitISub64(EmitContext& ctx, Id a, Id b); Id EmitSMulHi(EmitContext& ctx, Id a, Id b); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index ddc1e7574..01652c1cf 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -60,7 +60,7 @@ Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { return ctx.OpIAdd(ctx.U64, a, b); } -Id EmitIAddCary32(EmitContext& ctx, Id a, Id b) { +Id EmitIAddCarry32(EmitContext& ctx, Id a, Id b) { return ctx.OpIAddCarry(ctx.full_result_u32x2, a, b); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 77336c9ec..6a731d05c 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -76,6 +76,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf } else { SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450); } + String(fmt::format("{:#x}", info.pgm_hash)); AddCapability(spv::Capability::Shader); DefineArithmeticTypes(); @@ -700,7 +701,7 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4], - U32[4], U32[4], U32[4], U32[4], U32[4]), + U32[4], U32[4], U32[4], U32[4], U32[4], U32[2]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, PushData::Step0Index, "sr0"); @@ -715,6 +716,7 @@ void EmitContext::DefinePushDataBlock() { MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3"); MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0"); MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1"); + MemberName(struct_type, PushData::BufOffsetIndex + 2, "buf_offsets2"); MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U); MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U); MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U); @@ -727,6 +729,7 @@ void EmitContext::DefinePushDataBlock() { MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U); MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 2, spv::Decoration::Offset, 120U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); diff --git a/src/shader_recompiler/frontend/control_flow_graph.cpp b/src/shader_recompiler/frontend/control_flow_graph.cpp index b53db9e94..805fdb108 100644 --- a/src/shader_recompiler/frontend/control_flow_graph.cpp +++ b/src/shader_recompiler/frontend/control_flow_graph.cpp @@ -188,14 +188,15 @@ void CFG::SplitDivergenceScopes() { const bool is_close = is_close_scope(inst); if ((is_close || index == blk->end_index) && curr_begin != -1) { // If there are no instructions inside scope don't do anything. - if (index - curr_begin == 1) { + if (index - curr_begin == 1 && is_close) { curr_begin = -1; continue; } // If all instructions in the scope ignore exec masking, we shouldn't insert a // scope. const auto start = inst_list.begin() + curr_begin + 1; - if (!std::ranges::all_of(start, inst_list.begin() + index, IgnoresExecMask)) { + if (!std::ranges::all_of(start, inst_list.begin() + index + !is_close, + IgnoresExecMask)) { // Determine the first instruction affected by the exec mask. do { ++curr_begin; diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 52c8c733e..6c4427e5f 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -397,7 +397,7 @@ constexpr std::array InstructionFormatSOPP = {{ // 17 = S_SENDMSGHALT {InstClass::ScalarProgFlow, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, // 18 = S_TRAP - {InstClass::Undefined, InstCategory::Undefined, 0, 1, ScalarType::Any, ScalarType::Any}, + {InstClass::Undefined, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, // 19 = S_ICACHE_INV {InstClass::ScalarCache, InstCategory::FlowControl, 0, 1, ScalarType::Any, ScalarType::Any}, // 20 = S_INCPERFLEVEL diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index 8ead93f78..634486fc4 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -3,7 +3,6 @@ #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/ir/reg.h" -#include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" namespace Shader::Gcn { @@ -12,29 +11,29 @@ void Translator::EmitDataShare(const GcnInst& inst) { switch (inst.opcode) { // DS case Opcode::DS_ADD_U32: - return DS_ADD_U32(inst, false); + return DS_OP(inst, AtomicOp::Add, false); case Opcode::DS_ADD_U64: - return DS_ADD_U64(inst, false); + return DS_OP(inst, AtomicOp::Add, false); case Opcode::DS_SUB_U32: - return DS_SUB_U32(inst, false); + return DS_OP(inst, AtomicOp::Sub, false); case Opcode::DS_INC_U32: - return DS_INC_U32(inst, false); + return DS_OP(inst, AtomicOp::Inc, false); case Opcode::DS_DEC_U32: - return DS_DEC_U32(inst, false); + return DS_OP(inst, AtomicOp::Dec, false); case Opcode::DS_MIN_I32: - return DS_MIN_U32(inst, true, false); + return DS_OP(inst, AtomicOp::Smin, false); case Opcode::DS_MAX_I32: - return DS_MAX_U32(inst, true, false); + return DS_OP(inst, AtomicOp::Smax, false); case Opcode::DS_MIN_U32: - return DS_MIN_U32(inst, false, false); + return DS_OP(inst, AtomicOp::Umin, false); case Opcode::DS_MAX_U32: - return DS_MAX_U32(inst, false, false); + return DS_OP(inst, AtomicOp::Umax, false); case Opcode::DS_AND_B32: - return DS_AND_B32(inst, false); + return DS_OP(inst, AtomicOp::And, false); case Opcode::DS_OR_B32: - return DS_OR_B32(inst, false); + return DS_OP(inst, AtomicOp::Or, false); case Opcode::DS_XOR_B32: - return DS_XOR_B32(inst, false); + return DS_OP(inst, AtomicOp::Xor, false); case Opcode::DS_WRITE_B32: return DS_WRITE(32, false, false, false, inst); case Opcode::DS_WRITE2_B32: @@ -42,19 +41,19 @@ void Translator::EmitDataShare(const GcnInst& inst) { case Opcode::DS_WRITE2ST64_B32: return DS_WRITE(32, false, true, true, inst); case Opcode::DS_ADD_RTN_U32: - return DS_ADD_U32(inst, true); + return DS_OP(inst, AtomicOp::Add, true); case Opcode::DS_SUB_RTN_U32: - return DS_SUB_U32(inst, true); + return DS_OP(inst, AtomicOp::Sub, true); case Opcode::DS_MIN_RTN_U32: - return DS_MIN_U32(inst, false, true); + return DS_OP(inst, AtomicOp::Umin, true); case Opcode::DS_MAX_RTN_U32: - return DS_MAX_U32(inst, false, true); + return DS_OP(inst, AtomicOp::Umax, true); case Opcode::DS_AND_RTN_B32: - return DS_AND_B32(inst, true); + return DS_OP(inst, AtomicOp::And, true); case Opcode::DS_OR_RTN_B32: - return DS_OR_B32(inst, true); + return DS_OP(inst, AtomicOp::Or, true); case Opcode::DS_XOR_RTN_B32: - return DS_XOR_B32(inst, true); + return DS_OP(inst, AtomicOp::Xor, true); case Opcode::DS_SWIZZLE_B32: return DS_SWIZZLE_B32(inst); case Opcode::DS_READ_B32: @@ -117,92 +116,63 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) { // DS -void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) { +template +void Translator::DS_OP(const GcnInst& inst, AtomicOp op, bool rtn) { + const bool is_gds = inst.control.ds.gds; const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; + const T data = [&] { + if (op == AtomicOp::Inc || op == AtomicOp::Dec) { + return T{}; + } + if constexpr (std::is_same_v) { + return GetSrc(inst.src[1]); + } else { + return GetSrc64(inst.src[1]); + } + }(); const IR::U32 offset = ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); + const T original_val = [&] -> T { + switch (op) { + case AtomicOp::Add: + return ir.SharedAtomicIAdd(addr_offset, data, is_gds); + case AtomicOp::Umin: + return ir.SharedAtomicIMin(addr_offset, data, false, is_gds); + case AtomicOp::Smin: + return ir.SharedAtomicIMin(addr_offset, data, true, is_gds); + case AtomicOp::Umax: + return ir.SharedAtomicIMax(addr_offset, data, false, is_gds); + case AtomicOp::Smax: + return ir.SharedAtomicIMax(addr_offset, data, true, is_gds); + case AtomicOp::And: + return ir.SharedAtomicAnd(addr_offset, data, is_gds); + case AtomicOp::Or: + return ir.SharedAtomicOr(addr_offset, data, is_gds); + case AtomicOp::Xor: + return ir.SharedAtomicXor(addr_offset, data, is_gds); + case AtomicOp::Sub: + return ir.SharedAtomicISub(addr_offset, data, is_gds); + case AtomicOp::Inc: + return ir.SharedAtomicInc(addr_offset, is_gds); + case AtomicOp::Dec: + return ir.SharedAtomicDec(addr_offset, is_gds); + default: + UNREACHABLE(); + } + }(); if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_ADD_U64(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U64 data{GetSrc64(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data); - if (rtn) { - SetDst64(inst.dst[0], IR::U64{original_val}); - } -} - -void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); + if constexpr (std::is_same_v) { + SetDst(inst.dst[0], original_val); + } else { + SetDst64(inst.dst[0], original_val); + } } } void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst) { + const bool is_gds = inst.control.ds.gds; const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; const IR::VectorReg data0{inst.src[1].code}; const IR::VectorReg data1{inst.src[2].code}; @@ -220,33 +190,85 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid ir.WriteShared(64, ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1))), - addr0); + addr0, is_gds); } else if (bit_size == 32) { - ir.WriteShared(32, ir.GetVectorReg(data0), addr0); + ir.WriteShared(32, ir.GetVectorReg(data0), addr0, is_gds); } else if (bit_size == 16) { - ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0); + ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds); } const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); if (bit_size == 64) { ir.WriteShared(64, ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1))), - addr1); + addr1, is_gds); } else if (bit_size == 32) { - ir.WriteShared(32, ir.GetVectorReg(data1), addr1); + ir.WriteShared(32, ir.GetVectorReg(data1), addr1, is_gds); } else if (bit_size == 16) { - ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1); + ir.WriteShared(16, ir.UConvert(16, ir.GetVectorReg(data1)), addr1, is_gds); } } else { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); if (bit_size == 64) { const IR::Value data = ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)); - ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0); + ir.WriteShared(bit_size, ir.PackUint2x32(data), addr0, is_gds); } else if (bit_size == 32) { - ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); + ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0, is_gds); } else if (bit_size == 16) { - ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0); + ir.WriteShared(bit_size, ir.UConvert(16, ir.GetVectorReg(data0)), addr0, is_gds); + } + } +} + +void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, + const GcnInst& inst) { + const bool is_gds = inst.control.ds.gds; + const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; + IR::VectorReg dst_reg{inst.dst[0].code}; + const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0; + if (info.stage == Stage::Fragment) { + ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0, + "Unexpected shared memory offset alignment: {}", offset); + ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset))); + return; + } + if (is_pair) { + // Pair loads are either 32 or 64-bit + const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); + const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0, is_gds); + if (bit_size == 64) { + const auto vector = ir.UnpackUint2x32(IR::U64{data0}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)}); + } else if (bit_size == 32) { + ir.SetVectorReg(dst_reg++, IR::U32{data0}); + } else if (bit_size == 16) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})}); + } + const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); + const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1, is_gds); + if (bit_size == 64) { + const auto vector = ir.UnpackUint2x32(IR::U64{data1}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)}); + ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)}); + } else if (bit_size == 32) { + ir.SetVectorReg(dst_reg++, IR::U32{data1}); + } else if (bit_size == 16) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data1})}); + } + } else { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); + const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0, is_gds); + if (bit_size == 64) { + const auto vector = ir.UnpackUint2x32(IR::U64{data}); + ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)}); + ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)}); + } else if (bit_size == 32) { + ir.SetVectorReg(dst_reg, IR::U32{data}); + } else if (bit_size == 16) { + ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data})}); } } } @@ -263,91 +285,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { SetDst(inst.dst[0], ir.QuadShuffle(src, index)); } -void Translator::DS_INC_U32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicInc(addr_offset); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_DEC_U32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicDec(addr_offset); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_SUB_U32(const GcnInst& inst, bool rtn) { - const IR::U32 addr{GetSrc(inst.src[0])}; - const IR::U32 data{GetSrc(inst.src[1])}; - const IR::U32 offset = - ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)); - const IR::U32 addr_offset = ir.IAdd(addr, offset); - const IR::Value original_val = ir.SharedAtomicISub(addr_offset, data); - if (rtn) { - SetDst(inst.dst[0], IR::U32{original_val}); - } -} - -void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, - const GcnInst& inst) { - const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; - IR::VectorReg dst_reg{inst.dst[0].code}; - const u32 offset = (inst.control.ds.offset1 << 8u) + inst.control.ds.offset0; - if (info.stage == Stage::Fragment) { - ASSERT_MSG(!is_pair && bit_size == 32 && offset % 256 == 0, - "Unexpected shared memory offset alignment: {}", offset); - ir.SetVectorReg(dst_reg, ir.GetVectorReg(GetScratchVgpr(offset))); - return; - } - if (is_pair) { - // Pair loads are either 32 or 64-bit - const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1); - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj))); - const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0); - if (bit_size == 64) { - const auto vector = ir.UnpackUint2x32(IR::U64{data0}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)}); - } else if (bit_size == 32) { - ir.SetVectorReg(dst_reg++, IR::U32{data0}); - } else if (bit_size == 16) { - ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data0})}); - } - const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); - const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1); - if (bit_size == 64) { - const auto vector = ir.UnpackUint2x32(IR::U64{data1}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 0)}); - ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(vector, 1)}); - } else if (bit_size == 32) { - ir.SetVectorReg(dst_reg++, IR::U32{data1}); - } else if (bit_size == 16) { - ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data1})}); - } - } else { - const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); - const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); - if (bit_size == 64) { - const auto vector = ir.UnpackUint2x32(IR::U64{data}); - ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(vector, 0)}); - ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(vector, 1)}); - } else if (bit_size == 32) { - ir.SetVectorReg(dst_reg, IR::U32{data}); - } else if (bit_size == 16) { - ir.SetVectorReg(dst_reg++, IR::U32{ir.UConvert(32, IR::U16{data})}); - } - } -} - void Translator::DS_APPEND(const GcnInst& inst) { const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0; const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 48f977f49..276b55567 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -586,6 +586,15 @@ void Translator::S_MOV(const GcnInst& inst) { } void Translator::S_MOV_B64(const GcnInst& inst) { + // Moving SGPR to SGPR is used for thread masks, like most operations, but it can also be used + // for moving sharps. + if (inst.dst[0].field == OperandField::ScalarGPR && + inst.src[0].field == OperandField::ScalarGPR) { + ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code), + ir.GetScalarReg(IR::ScalarReg(inst.src[0].code))); + ir.SetScalarReg(IR::ScalarReg(inst.dst[0].code + 1), + ir.GetScalarReg(IR::ScalarReg(inst.src[0].code + 1))); + } const IR::U1 src = [&] { switch (inst.src[0].field) { case OperandField::VccLo: diff --git a/src/shader_recompiler/frontend/translate/scalar_flow.cpp b/src/shader_recompiler/frontend/translate/scalar_flow.cpp index cd1cf51f0..7b57d89ca 100644 --- a/src/shader_recompiler/frontend/translate/scalar_flow.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_flow.cpp @@ -16,6 +16,9 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { case Opcode::S_SETPRIO: LOG_WARNING(Render_Vulkan, "S_SETPRIO instruction!"); return; + case Opcode::S_TRAP: + LOG_WARNING(Render_Vulkan, "S_TRAP instruction!"); + return; case Opcode::S_GETPC_B64: return S_GETPC_B64(pc, inst); case Opcode::S_SETPC_B64: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index b5bfec344..4b5ff827b 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -270,21 +270,13 @@ public: // Data share // DS - void DS_ADD_U32(const GcnInst& inst, bool rtn); - void DS_ADD_U64(const GcnInst& inst, bool rtn); - void DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn); - void DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn); + template + void DS_OP(const GcnInst& inst, AtomicOp op, bool rtn); void DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); - void DS_SWIZZLE_B32(const GcnInst& inst); - void DS_AND_B32(const GcnInst& inst, bool rtn); - void DS_OR_B32(const GcnInst& inst, bool rtn); - void DS_XOR_B32(const GcnInst& inst, bool rtn); void DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, const GcnInst& inst); + void DS_SWIZZLE_B32(const GcnInst& inst); void DS_APPEND(const GcnInst& inst); void DS_CONSUME(const GcnInst& inst); - void DS_SUB_U32(const GcnInst& inst, bool rtn); - void DS_INC_U32(const GcnInst& inst, bool rtn); - void DS_DEC_U32(const GcnInst& inst, bool rtn); // Buffer Memory // MUBUF / MTBUF diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 54f1088f2..017c77fb0 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -565,7 +565,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { } // v_mbcnt_hi_u32_b32 vX, exec_hi, 0/vZ if ((inst.src[0].field == OperandField::ExecHi || - inst.src[0].field == OperandField::VccHi) && + inst.src[0].field == OperandField::VccHi || + inst.src[0].field == OperandField::ScalarGPR) && (inst.src[1].field == OperandField::ConstZero || inst.src[1].field == OperandField::VectorGPR)) { return SetDst(inst.dst[0], GetSrc(inst.src[1])); @@ -579,7 +580,8 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { } // v_mbcnt_lo_u32_b32 vY, exec_lo, vX // used combined with above for append buffer indexing. - if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo) { + if (inst.src[0].field == OperandField::ExecLo || inst.src[0].field == OperandField::VccLo || + inst.src[0].field == OperandField::ScalarGPR) { return SetDst(inst.dst[0], GetSrc(inst.src[1])); } UNREACHABLE(); @@ -623,12 +625,15 @@ void Translator::V_ADDC_U32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; const IR::U32 carry{GetCarryIn(inst)}; - const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)}; - SetDst(inst.dst[0], result); + const IR::Value tmp1{ir.IAddCarry(src0, src1)}; + const IR::U32 result1{ir.CompositeExtract(tmp1, 0)}; + const IR::U32 carry_out1{ir.CompositeExtract(tmp1, 1)}; + const IR::Value tmp2{ir.IAddCarry(result1, carry)}; + const IR::U32 result2{ir.CompositeExtract(tmp2, 0)}; + const IR::U32 carry_out2{ir.CompositeExtract(tmp2, 1)}; + SetDst(inst.dst[0], result2); - const IR::U1 less_src0{ir.ILessThan(result, src0, false)}; - const IR::U1 less_src1{ir.ILessThan(result, src1, false)}; - const IR::U1 did_overflow{ir.LogicalOr(less_src0, less_src1)}; + const IR::U1 did_overflow{ir.INotEqual(ir.BitwiseOr(carry_out1, carry_out2), ir.Imm32(0))}; SetCarryOut(inst, did_overflow); } diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 91f545cfd..df20f7f73 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -192,9 +192,10 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_ const IR::VectorReg vaddr{inst.src[0].code}; const IR::ScalarReg sharp{inst.src[2].code * 4}; const IR::Value soffset{GetSrc(inst.src[3])}; + const bool has_soffset = !soffset.IsImmediate() || soffset.U32() != 0; if (info.stage != Stage::Geometry) { - ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0, - "Non immediate offset not supported"); + ASSERT_MSG(!has_soffset || !mubuf.offen, + "Having both scalar and vector offsets is not supported"); } const IR::Value address = [&] -> IR::Value { @@ -204,15 +205,21 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_inst_typed, bool is_buffer_ if (mubuf.idxen && mubuf.offen) { return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1)); } + if (mubuf.idxen && has_soffset) { + return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset); + } if (mubuf.idxen || mubuf.offen) { return ir.GetVectorReg(vaddr); } + if (has_soffset) { + return soffset; + } return {}; }(); IR::BufferInstInfo buffer_info{}; buffer_info.index_enable.Assign(mubuf.idxen); - buffer_info.offset_enable.Assign(mubuf.offen); + buffer_info.offset_enable.Assign(mubuf.offen || has_soffset); buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 72977b711..9703643e8 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -25,7 +25,7 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; static constexpr size_t NumImages = 64; -static constexpr size_t NumBuffers = 32; +static constexpr size_t NumBuffers = 40; static constexpr size_t NumSamplers = 16; static constexpr size_t NumFMasks = 8; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 3d64cc5da..b88e1a17d 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -291,78 +291,137 @@ void IREmitter::SetPatch(Patch patch, const F32& value) { Inst(Opcode::SetPatch, patch, value); } -Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { +Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset, bool is_gds) { switch (bit_size) { case 16: - return Inst(Opcode::LoadSharedU16, offset); + return Inst(Opcode::LoadSharedU16, Flags{is_gds}, offset); case 32: - return Inst(Opcode::LoadSharedU32, offset); + return Inst(Opcode::LoadSharedU32, Flags{is_gds}, offset); case 64: - return Inst(Opcode::LoadSharedU64, offset); + return Inst(Opcode::LoadSharedU64, Flags{is_gds}, offset); default: UNREACHABLE_MSG("Invalid bit size {}", bit_size); } } -void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { +void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds) { switch (bit_size) { case 16: - Inst(Opcode::WriteSharedU16, offset, value); + Inst(Opcode::WriteSharedU16, Flags{is_gds}, offset, value); break; case 32: - Inst(Opcode::WriteSharedU32, offset, value); + Inst(Opcode::WriteSharedU32, Flags{is_gds}, offset, value); break; case 64: - Inst(Opcode::WriteSharedU64, offset, value); + Inst(Opcode::WriteSharedU64, Flags{is_gds}, offset, value); break; default: UNREACHABLE_MSG("Invalid bit size {}", bit_size); } } -U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) { +U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds) { switch (data.Type()) { case Type::U32: - return Inst(Opcode::SharedAtomicIAdd32, address, data); + return Inst(Opcode::SharedAtomicIAdd32, Flags{is_gds}, address, data); case Type::U64: - return Inst(Opcode::SharedAtomicIAdd64, address, data); + return Inst(Opcode::SharedAtomicIAdd64, Flags{is_gds}, address, data); default: ThrowInvalidType(data.Type()); } } -U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) { - return is_signed ? Inst(Opcode::SharedAtomicSMin32, address, data) - : Inst(Opcode::SharedAtomicUMin32, address, data); +U32U64 IREmitter::SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed, + bool is_gds) { + switch (data.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SharedAtomicSMin32 : Opcode::SharedAtomicUMin32, + Flags{is_gds}, address, data); + case Type::U64: + return Inst(is_signed ? Opcode::SharedAtomicSMin64 : Opcode::SharedAtomicUMin64, + Flags{is_gds}, address, data); + default: + ThrowInvalidType(data.Type()); + } } -U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) { - return is_signed ? Inst(Opcode::SharedAtomicSMax32, address, data) - : Inst(Opcode::SharedAtomicUMax32, address, data); +U32U64 IREmitter::SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed, + bool is_gds) { + switch (data.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SharedAtomicSMax32 : Opcode::SharedAtomicUMax32, + Flags{is_gds}, address, data); + case Type::U64: + return Inst(is_signed ? Opcode::SharedAtomicSMax64 : Opcode::SharedAtomicUMax64, + Flags{is_gds}, address, data); + default: + ThrowInvalidType(data.Type()); + } } -U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) { - return Inst(Opcode::SharedAtomicAnd32, address, data); +U32U64 IREmitter::SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds) { + switch (data.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data); + case Type::U64: + return Inst(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data); + default: + ThrowInvalidType(data.Type()); + } } -U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) { +U32U64 IREmitter::SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds) { + switch (data.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data); + case Type::U64: + return Inst(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data); + default: + ThrowInvalidType(data.Type()); + } return Inst(Opcode::SharedAtomicOr32, address, data); } -U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) { - return Inst(Opcode::SharedAtomicXor32, address, data); +U32U64 IREmitter::SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds) { + switch (data.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicXor32, Flags{is_gds}, address, data); + case Type::U64: + return Inst(Opcode::SharedAtomicXor64, Flags{is_gds}, address, data); + default: + ThrowInvalidType(data.Type()); + } } -U32 IREmitter::SharedAtomicInc(const U32& address) { - return Inst(Opcode::SharedAtomicInc32, address); +U32U64 IREmitter::SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds) { + switch (data.Type()) { + case Type::U32: + return Inst(Opcode::SharedAtomicISub32, Flags{is_gds}, address, data); + case Type::U64: + return Inst(Opcode::SharedAtomicISub64, Flags{is_gds}, address, data); + default: + ThrowInvalidType(data.Type()); + } } -U32 IREmitter::SharedAtomicDec(const U32& address) { - return Inst(Opcode::SharedAtomicDec32, address); +template <> +U32 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) { + return Inst(Opcode::SharedAtomicInc32, Flags{is_gds}, address); } -U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) { - return Inst(Opcode::SharedAtomicISub32, address, data); +template <> +U64 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) { + return Inst(Opcode::SharedAtomicInc64, Flags{is_gds}, address); +} + +template <> +U32 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) { + return Inst(Opcode::SharedAtomicDec32, Flags{is_gds}, address); +} + +template <> +U64 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) { + return Inst(Opcode::SharedAtomicDec64, Flags{is_gds}, address); } U32 IREmitter::ReadConst(const Value& base, const U32& offset) { @@ -1424,13 +1483,13 @@ U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { } } -Value IREmitter::IAddCary(const U32& a, const U32& b) { +Value IREmitter::IAddCarry(const U32& a, const U32& b) { if (a.Type() != b.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type()); } switch (a.Type()) { case Type::U32: - return Inst(Opcode::IAddCary32, a, b); + return Inst(Opcode::IAddCarry32, a, b); default: ThrowInvalidType(a.Type()); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 119e3752e..d9e5aab7a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -96,18 +96,24 @@ public: [[nodiscard]] F32 GetPatch(Patch patch); void SetPatch(Patch patch, const F32& value); - [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); - void WriteShared(int bit_size, const Value& value, const U32& offset); + [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset, + bool is_gds = false); + void WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds = false); - [[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data); - [[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data); - [[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed); - [[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed); - [[nodiscard]] U32 SharedAtomicInc(const U32& address); - [[nodiscard]] U32 SharedAtomicDec(const U32& address); - [[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data); - [[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data); - [[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data); + [[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds); + [[nodiscard]] U32U64 SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds); + [[nodiscard]] U32U64 SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed, + bool is_gds); + [[nodiscard]] U32U64 SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed, + bool is_gds); + [[nodiscard]] U32U64 SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds); + [[nodiscard]] U32U64 SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds); + [[nodiscard]] U32U64 SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds); + + template + [[nodiscard]] T SharedAtomicInc(const U32& address, bool is_gds); + template + [[nodiscard]] T SharedAtomicDec(const U32& address, bool is_gds); [[nodiscard]] U32 ReadConst(const Value& base, const U32& offset); [[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index); @@ -254,7 +260,7 @@ public: [[nodiscard]] F32F64 FPMedTri(const F32F64& a, const F32F64& b, const F32F64& c); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); - [[nodiscard]] Value IAddCary(const U32& a, const U32& b); + [[nodiscard]] Value IAddCarry(const U32& a, const U32& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32 IMulHi(const U32& a, const U32& b, bool is_signed = false); [[nodiscard]] U32U64 IMul(const U32U64& a, const U32U64& b); diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp index 84bdb5739..eaab05cb7 100644 --- a/src/shader_recompiler/ir/microinstruction.cpp +++ b/src/shader_recompiler/ir/microinstruction.cpp @@ -92,7 +92,6 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::WriteSharedU32: case Opcode::WriteSharedU64: case Opcode::SharedAtomicIAdd32: - case Opcode::SharedAtomicIAdd64: case Opcode::SharedAtomicISub32: case Opcode::SharedAtomicSMin32: case Opcode::SharedAtomicUMin32: @@ -103,6 +102,17 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SharedAtomicAnd32: case Opcode::SharedAtomicOr32: case Opcode::SharedAtomicXor32: + case Opcode::SharedAtomicIAdd64: + case Opcode::SharedAtomicISub64: + case Opcode::SharedAtomicSMin64: + case Opcode::SharedAtomicUMin64: + case Opcode::SharedAtomicSMax64: + case Opcode::SharedAtomicUMax64: + case Opcode::SharedAtomicInc64: + case Opcode::SharedAtomicDec64: + case Opcode::SharedAtomicAnd64: + case Opcode::SharedAtomicOr64: + case Opcode::SharedAtomicXor64: case Opcode::ImageWrite: case Opcode::ImageAtomicIAdd32: case Opcode::ImageAtomicSMin32: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 008f44659..08dcec458 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -41,15 +41,25 @@ OPCODE(WriteSharedU64, Void, U32, OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) OPCODE(SharedAtomicIAdd64, U64, U32, U64, ) OPCODE(SharedAtomicISub32, U32, U32, U32, ) +OPCODE(SharedAtomicISub64, U64, U32, U64, ) OPCODE(SharedAtomicSMin32, U32, U32, U32, ) +OPCODE(SharedAtomicSMin64, U64, U32, U64, ) OPCODE(SharedAtomicUMin32, U32, U32, U32, ) +OPCODE(SharedAtomicUMin64, U64, U32, U64, ) OPCODE(SharedAtomicSMax32, U32, U32, U32, ) +OPCODE(SharedAtomicSMax64, U64, U32, U64, ) OPCODE(SharedAtomicUMax32, U32, U32, U32, ) +OPCODE(SharedAtomicUMax64, U64, U32, U64, ) OPCODE(SharedAtomicInc32, U32, U32, ) +OPCODE(SharedAtomicInc64, U64, U32, ) OPCODE(SharedAtomicDec32, U32, U32, ) +OPCODE(SharedAtomicDec64, U64, U32, ) OPCODE(SharedAtomicAnd32, U32, U32, U32, ) +OPCODE(SharedAtomicAnd64, U64, U32, U64, ) OPCODE(SharedAtomicOr32, U32, U32, U32, ) +OPCODE(SharedAtomicOr64, U64, U32, U64, ) OPCODE(SharedAtomicXor32, U32, U32, U32, ) +OPCODE(SharedAtomicXor64, U64, U32, U64, ) // Context getters/setters OPCODE(GetUserData, U32, ScalarReg, ) @@ -328,7 +338,7 @@ OPCODE(FPCmpClass32, U1, F32, // Integer operations OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd64, U64, U64, U64, ) -OPCODE(IAddCary32, U32x2, U32, U32, ) +OPCODE(IAddCarry32, U32x2, U32, U32, ) OPCODE(ISub32, U32, U32, U32, ) OPCODE(ISub64, U64, U64, U64, ) OPCODE(IMul32, U32, U32, U32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index f3972769c..e5a4beb8b 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -84,8 +84,42 @@ bool IsBufferInstruction(const IR::Inst& inst) { } bool IsDataRingInstruction(const IR::Inst& inst) { - return inst.GetOpcode() == IR::Opcode::DataAppend || - inst.GetOpcode() == IR::Opcode::DataConsume; + switch (inst.GetOpcode()) { + case IR::Opcode::DataAppend: + case IR::Opcode::DataConsume: + return true; + case IR::Opcode::LoadSharedU16: + case IR::Opcode::LoadSharedU32: + case IR::Opcode::LoadSharedU64: + case IR::Opcode::WriteSharedU16: + case IR::Opcode::WriteSharedU32: + case IR::Opcode::WriteSharedU64: + case IR::Opcode::SharedAtomicIAdd32: + case IR::Opcode::SharedAtomicIAdd64: + case IR::Opcode::SharedAtomicUMin32: + case IR::Opcode::SharedAtomicUMin64: + case IR::Opcode::SharedAtomicSMin32: + case IR::Opcode::SharedAtomicSMin64: + case IR::Opcode::SharedAtomicUMax32: + case IR::Opcode::SharedAtomicUMax64: + case IR::Opcode::SharedAtomicSMax32: + case IR::Opcode::SharedAtomicSMax64: + case IR::Opcode::SharedAtomicAnd32: + case IR::Opcode::SharedAtomicAnd64: + case IR::Opcode::SharedAtomicOr32: + case IR::Opcode::SharedAtomicOr64: + case IR::Opcode::SharedAtomicXor32: + case IR::Opcode::SharedAtomicXor64: + case IR::Opcode::SharedAtomicISub32: + case IR::Opcode::SharedAtomicISub64: + case IR::Opcode::SharedAtomicInc32: + case IR::Opcode::SharedAtomicInc64: + case IR::Opcode::SharedAtomicDec32: + case IR::Opcode::SharedAtomicDec64: + return inst.Flags(); // is_gds + default: + return false; + } } IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { @@ -507,7 +541,8 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& } } -void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { +void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors) { const u32 binding = descriptors.Add(BufferResource{ .used_types = IR::Type::U32, .inline_cbuf = AmdGpu::Buffer::Null(), @@ -515,37 +550,111 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto .is_written = true, }); - const auto pred = [](const IR::Inst* inst) -> std::optional { - if (inst->GetOpcode() == IR::Opcode::GetUserData) { - return inst; - } - return std::nullopt; - }; - - // Attempt to deduce the GDS address of counter at compile time. - u32 gds_addr = 0; - const IR::Value& gds_offset = inst.Arg(0); - if (gds_offset.IsImmediate()) { - // Nothing to do, offset is known. - gds_addr = gds_offset.U32() & 0xFFFF; - } else { - const auto result = IR::BreadthFirstSearch(&inst, pred); - ASSERT_MSG(result, "Unable to track M0 source"); - - // M0 must be set by some user data register. - const IR::Inst* prod = gds_offset.InstRecursive(); - const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); - u32 m0_val = info.user_data[ud_reg] >> 16; - if (prod->GetOpcode() == IR::Opcode::IAdd32) { - m0_val += prod->Arg(1).U32(); - } - gds_addr = m0_val & 0xFFFF; - } - - // Patch instruction. IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; - inst.SetArg(0, ir.Imm32(gds_addr >> 2)); - inst.SetArg(1, ir.Imm32(binding)); + + // For data append/consume operations attempt to deduce the GDS address. + if (inst.GetOpcode() == IR::Opcode::DataAppend || inst.GetOpcode() == IR::Opcode::DataConsume) { + const auto pred = [](const IR::Inst* inst) -> std::optional { + if (inst->GetOpcode() == IR::Opcode::GetUserData) { + return inst; + } + return std::nullopt; + }; + + u32 gds_addr = 0; + const IR::Value& gds_offset = inst.Arg(0); + if (gds_offset.IsImmediate()) { + // Nothing to do, offset is known. + gds_addr = gds_offset.U32() & 0xFFFF; + } else { + const auto result = IR::BreadthFirstSearch(&inst, pred); + ASSERT_MSG(result, "Unable to track M0 source"); + + // M0 must be set by some user data register. + const IR::Inst* prod = gds_offset.InstRecursive(); + const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); + u32 m0_val = info.user_data[ud_reg] >> 16; + if (prod->GetOpcode() == IR::Opcode::IAdd32) { + m0_val += prod->Arg(1).U32(); + } + gds_addr = m0_val & 0xFFFF; + } + + // Patch instruction. + inst.SetArg(0, ir.Imm32(gds_addr >> 2)); + inst.SetArg(1, ir.Imm32(binding)); + } else { + // Convert shared memory opcode to storage buffer atomic to GDS buffer. + const IR::U32 offset = IR::U32{inst.Arg(0)}; + const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1)); + const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2)); + const IR::U32 address_qwords = ir.ShiftRightLogical(offset, ir.Imm32(3)); + const IR::U32 handle = ir.Imm32(binding); + switch (inst.GetOpcode()) { + case IR::Opcode::SharedAtomicIAdd32: + inst.ReplaceUsesWith(ir.BufferAtomicIAdd(handle, address_dwords, inst.Arg(1), {})); + break; + case IR::Opcode::SharedAtomicIAdd64: + inst.ReplaceUsesWith( + ir.BufferAtomicIAdd(handle, address_qwords, IR::U64{inst.Arg(1)}, {})); + break; + case IR::Opcode::SharedAtomicISub32: + inst.ReplaceUsesWith(ir.BufferAtomicISub(handle, address_dwords, inst.Arg(1), {})); + break; + case IR::Opcode::SharedAtomicSMin32: + case IR::Opcode::SharedAtomicUMin32: { + const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32; + inst.ReplaceUsesWith( + ir.BufferAtomicIMin(handle, address_dwords, inst.Arg(1), is_signed, {})); + break; + } + case IR::Opcode::SharedAtomicSMax32: + case IR::Opcode::SharedAtomicUMax32: { + const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32; + inst.ReplaceUsesWith( + ir.BufferAtomicIMax(handle, address_dwords, inst.Arg(1), is_signed, {})); + break; + } + case IR::Opcode::SharedAtomicInc32: + inst.ReplaceUsesWith(ir.BufferAtomicInc(handle, address_dwords, {})); + break; + case IR::Opcode::SharedAtomicDec32: + inst.ReplaceUsesWith(ir.BufferAtomicDec(handle, address_dwords, {})); + break; + case IR::Opcode::SharedAtomicAnd32: + inst.ReplaceUsesWith(ir.BufferAtomicAnd(handle, address_dwords, inst.Arg(1), {})); + break; + case IR::Opcode::SharedAtomicOr32: + inst.ReplaceUsesWith(ir.BufferAtomicOr(handle, address_dwords, inst.Arg(1), {})); + break; + case IR::Opcode::SharedAtomicXor32: + inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {})); + break; + case IR::Opcode::LoadSharedU16: + inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {})); + break; + case IR::Opcode::LoadSharedU32: + inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {})); + break; + case IR::Opcode::LoadSharedU64: + inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {})); + break; + case IR::Opcode::WriteSharedU16: + ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {}); + inst.Invalidate(); + break; + case IR::Opcode::WriteSharedU32: + ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {}); + inst.Invalidate(); + break; + case IR::Opcode::WriteSharedU64: + ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {}); + inst.Invalidate(); + break; + default: + UNREACHABLE(); + } + } } IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info, @@ -916,8 +1025,6 @@ void ResourceTrackingPass(IR::Program& program) { PatchBufferSharp(*block, inst, info, descriptors); } else if (IsImageInstruction(inst)) { PatchImageSharp(*block, inst, info, descriptors); - } else if (IsDataRingInstruction(inst)) { - PatchDataRingAccess(*block, inst, info, descriptors); } } } @@ -929,6 +1036,8 @@ void ResourceTrackingPass(IR::Program& program) { PatchBufferArgs(*block, inst, info); } else if (IsImageInstruction(inst)) { PatchImageArgs(*block, inst, info); + } else if (IsDataRingInstruction(inst)) { + PatchGlobalDataShareAccess(*block, inst, info, descriptors); } } } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index a87dceb0a..079827866 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -55,6 +55,16 @@ void Visit(Info& info, const IR::Inst& inst) { info.shared_types |= IR::Type::U32; break; case IR::Opcode::SharedAtomicIAdd64: + case IR::Opcode::SharedAtomicISub64: + case IR::Opcode::SharedAtomicSMin64: + case IR::Opcode::SharedAtomicUMin64: + case IR::Opcode::SharedAtomicSMax64: + case IR::Opcode::SharedAtomicUMax64: + case IR::Opcode::SharedAtomicInc64: + case IR::Opcode::SharedAtomicDec64: + case IR::Opcode::SharedAtomicAnd64: + case IR::Opcode::SharedAtomicOr64: + case IR::Opcode::SharedAtomicXor64: info.uses_shared_int64_atomics = true; [[fallthrough]]; case IR::Opcode::LoadSharedU64: diff --git a/src/shader_recompiler/ir/passes/shared_memory_simplify_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_simplify_pass.cpp index 0f80a3b28..555fd505b 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_simplify_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_simplify_pass.cpp @@ -15,6 +15,16 @@ static bool Requires16BitSharedAtomic(const IR::Inst& inst) { static bool Requires64BitSharedAtomic(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::SharedAtomicIAdd64: + case IR::Opcode::SharedAtomicISub64: + case IR::Opcode::SharedAtomicSMin64: + case IR::Opcode::SharedAtomicUMin64: + case IR::Opcode::SharedAtomicSMax64: + case IR::Opcode::SharedAtomicUMax64: + case IR::Opcode::SharedAtomicInc64: + case IR::Opcode::SharedAtomicDec64: + case IR::Opcode::SharedAtomicAnd64: + case IR::Opcode::SharedAtomicOr64: + case IR::Opcode::SharedAtomicXor64: return true; default: return false; diff --git a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp index a6900e180..b84011acc 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp @@ -17,7 +17,6 @@ static bool IsSharedAccess(const IR::Inst& inst) { case IR::Opcode::WriteSharedU32: case IR::Opcode::WriteSharedU64: case IR::Opcode::SharedAtomicIAdd32: - case IR::Opcode::SharedAtomicIAdd64: case IR::Opcode::SharedAtomicISub32: case IR::Opcode::SharedAtomicSMin32: case IR::Opcode::SharedAtomicUMin32: @@ -28,6 +27,17 @@ static bool IsSharedAccess(const IR::Inst& inst) { case IR::Opcode::SharedAtomicAnd32: case IR::Opcode::SharedAtomicOr32: case IR::Opcode::SharedAtomicXor32: + case IR::Opcode::SharedAtomicIAdd64: + case IR::Opcode::SharedAtomicISub64: + case IR::Opcode::SharedAtomicSMin64: + case IR::Opcode::SharedAtomicUMin64: + case IR::Opcode::SharedAtomicSMax64: + case IR::Opcode::SharedAtomicUMax64: + case IR::Opcode::SharedAtomicInc64: + case IR::Opcode::SharedAtomicDec64: + case IR::Opcode::SharedAtomicAnd64: + case IR::Opcode::SharedAtomicOr64: + case IR::Opcode::SharedAtomicXor64: return true; default: return false; @@ -64,6 +74,16 @@ IR::Type CalculateSharedMemoryTypes(IR::Program& program) { case IR::Opcode::LoadSharedU64: case IR::Opcode::WriteSharedU64: case IR::Opcode::SharedAtomicIAdd64: + case IR::Opcode::SharedAtomicISub64: + case IR::Opcode::SharedAtomicSMin64: + case IR::Opcode::SharedAtomicUMin64: + case IR::Opcode::SharedAtomicSMax64: + case IR::Opcode::SharedAtomicUMax64: + case IR::Opcode::SharedAtomicInc64: + case IR::Opcode::SharedAtomicDec64: + case IR::Opcode::SharedAtomicAnd64: + case IR::Opcode::SharedAtomicOr64: + case IR::Opcode::SharedAtomicXor64: used_types |= IR::Type::U64; break; default: @@ -119,19 +139,26 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_ ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {})); continue; case IR::Opcode::SharedAtomicISub32: + case IR::Opcode::SharedAtomicISub64: inst.ReplaceUsesWithAndRemove( ir.BufferAtomicISub(handle, address, inst.Arg(1), {})); continue; case IR::Opcode::SharedAtomicSMin32: - case IR::Opcode::SharedAtomicUMin32: { - const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32; + case IR::Opcode::SharedAtomicUMin32: + case IR::Opcode::SharedAtomicSMin64: + case IR::Opcode::SharedAtomicUMin64: { + const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32 || + inst.GetOpcode() == IR::Opcode::SharedAtomicSMin64; inst.ReplaceUsesWithAndRemove( ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {})); continue; } case IR::Opcode::SharedAtomicSMax32: - case IR::Opcode::SharedAtomicUMax32: { - const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32; + case IR::Opcode::SharedAtomicUMax32: + case IR::Opcode::SharedAtomicSMax64: + case IR::Opcode::SharedAtomicUMax64: { + const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32 || + inst.GetOpcode() == IR::Opcode::SharedAtomicSMax64; inst.ReplaceUsesWithAndRemove( ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {})); continue; @@ -143,12 +170,15 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_ inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {})); continue; case IR::Opcode::SharedAtomicAnd32: + case IR::Opcode::SharedAtomicAnd64: inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {})); continue; case IR::Opcode::SharedAtomicOr32: + case IR::Opcode::SharedAtomicOr64: inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {})); continue; case IR::Opcode::SharedAtomicXor32: + case IR::Opcode::SharedAtomicXor64: inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {})); continue; case IR::Opcode::LoadSharedU16: @@ -173,7 +203,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_ inst.Invalidate(); break; default: - break; + UNREACHABLE(); } } } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 9b8c28b66..3e66fba6a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -135,9 +135,8 @@ void Liverpool::Process(std::stop_token stoken) { if (submit_done) { VideoCore::EndCapture(); - if (rasterizer) { - rasterizer->ProcessFaults(); + rasterizer->EndCommandList(); rasterizer->Flush(); } submit_done = false; @@ -604,6 +603,8 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanevent_index.Value() == EventIndex::ZpassDone) { + LOG_WARNING(Render, "Unimplemented occlusion query"); } break; } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0613823ab..c07e9f63a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -88,7 +88,7 @@ struct Liverpool { } }; - static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) { + static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) { constexpr u32 token_mov_vcchi = 0xBEEB03FF; if (code[0] == token_mov_vcchi) { diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index 15bf0d81e..e85a6eb18 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -137,12 +137,15 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& size_bytes); } -std::pair StreamBuffer::Map(u64 size, u64 alignment) { +std::pair StreamBuffer::Map(u64 size, u64 alignment, bool allow_wait) { if (!is_coherent && usage == MemoryUsage::Stream) { size = Common::AlignUp(size, instance->NonCoherentAtomSize()); } - ASSERT(size <= this->size_bytes); + if (size > this->size_bytes) { + return {nullptr, 0}; + } + mapped_size = size; if (alignment > 0) { @@ -162,8 +165,11 @@ std::pair StreamBuffer::Map(u64 size, u64 alignment) { } const u64 mapped_upper_bound = offset + size; - WaitPendingOperations(mapped_upper_bound); - return std::make_pair(mapped_data.data() + offset, offset); + if (!WaitPendingOperations(mapped_upper_bound, allow_wait)) { + return {nullptr, 0}; + } + + return {mapped_data.data() + offset, offset}; } void StreamBuffer::Commit() { @@ -177,6 +183,12 @@ void StreamBuffer::Commit() { } offset += mapped_size; + if (current_watch_cursor != 0 && + current_watches[current_watch_cursor].tick == scheduler->CurrentTick()) { + current_watches[current_watch_cursor].upper_bound = offset; + return; + } + if (current_watch_cursor + 1 >= current_watches.size()) { // Ensure that there are enough watches. ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); @@ -191,16 +203,20 @@ void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_ watches.resize(watches.size() + grow_size); } -void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { +bool StreamBuffer::WaitPendingOperations(u64 requested_upper_bound, bool allow_wait) { if (!invalidation_mark) { - return; + return true; } while (requested_upper_bound > wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; - wait_bound = watch.upper_bound; + if (!scheduler->IsFree(watch.tick) && !allow_wait) { + return false; + } scheduler->Wait(watch.tick); + wait_bound = watch.upper_bound; ++wait_cursor; } + return true; } } // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index 530968787..a7a0ce84f 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -168,7 +168,7 @@ public: MemoryUsage usage, u64 size_bytes_); /// Reserves a region of memory from the stream buffer. - std::pair Map(u64 size, u64 alignment = 0); + std::pair Map(u64 size, u64 alignment = 0, bool allow_wait = true); /// Ensures that reserved bytes of memory are available to the GPU. void Commit(); @@ -181,10 +181,6 @@ public: return offset; } - u64 GetFreeSize() const { - return size_bytes - offset - mapped_size; - } - private: struct Watch { u64 tick{}; @@ -195,7 +191,7 @@ private: void ReserveWatches(std::vector& watches, std::size_t grow_size); /// Waits pending watches until requested upper bound. - void WaitPendingOperations(u64 requested_upper_bound); + bool WaitPendingOperations(u64 requested_upper_bound, bool allow_wait); private: u64 offset{}; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index d55e05d1e..28444ac60 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -48,6 +48,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s memory_tracker = std::make_unique(tracker); + std::memset(gds_buffer.mapped_data.data(), 0, DataShareBufferSize); + // Ensure the first slot is used for the null buffer const auto null_id = slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, 16); @@ -137,8 +139,7 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { return; } memory_tracker->InvalidateRegion( - device_addr, size, Config::readbacks(), - [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { @@ -313,7 +314,10 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) { ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned"); if (!is_gds) { - ASSERT(memory->TryWriteBacking(std::bit_cast(address), value, num_bytes)); + if (!memory->TryWriteBacking(std::bit_cast(address), value, num_bytes)) { + std::memcpy(std::bit_cast(address), value, num_bytes); + return; + } if (!IsRegionRegistered(address, num_bytes)) { return; } @@ -817,22 +821,22 @@ void BufferCache::ChangeRegister(BufferId buffer_id) { void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer) { boost::container::small_vector copies; + size_t total_size_bytes = 0; VAddr buffer_start = buffer.CpuAddr(); + vk::Buffer src_buffer = VK_NULL_HANDLE; memory_tracker->ForEachUploadRange( - device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { - const u64 offset = staging_buffer.Copy(device_addr_out, range_size); - copies.push_back(vk::BufferCopy{ - .srcOffset = offset, - .dstOffset = device_addr_out - buffer_start, - .size = range_size, - }); - }); + device_addr, size, is_written, + [&](u64 device_addr_out, u64 range_size) { + copies.emplace_back(total_size_bytes, device_addr_out - buffer_start, range_size); + total_size_bytes += range_size; + }, + [&] { src_buffer = UploadCopies(buffer, copies, total_size_bytes); }); SCOPE_EXIT { if (is_texel_buffer) { SynchronizeBufferFromImage(buffer, device_addr, size); } }; - if (copies.empty()) { + if (!src_buffer) { return; } scheduler.EndRendering(); @@ -861,7 +865,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &pre_barrier, }); - cmdbuf.copyBuffer(staging_buffer.Handle(), buffer.buffer, copies); + cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); cmdbuf.pipelineBarrier2(vk::DependencyInfo{ .dependencyFlags = vk::DependencyFlagBits::eByRegion, .bufferMemoryBarrierCount = 1, @@ -869,6 +873,39 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); } +vk::Buffer BufferCache::UploadCopies(Buffer& buffer, std::span copies, + size_t total_size_bytes) { + if (copies.empty()) { + return VK_NULL_HANDLE; + } + const auto [staging, offset] = staging_buffer.Map(total_size_bytes); + if (staging) { + for (auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + // Apply the staging offset + copy.srcOffset += offset; + } + staging_buffer.Commit(); + return staging_buffer.Handle(); + } else { + // For large one time transfers use a temporary host buffer. + auto temp_buffer = + std::make_unique(instance, scheduler, MemoryUsage::Upload, 0, + vk::BufferUsageFlagBits::eTransferSrc, total_size_bytes); + const vk::Buffer src_buffer = temp_buffer->Handle(); + u8* const staging = temp_buffer->mapped_data.data(); + for (const auto& copy : copies) { + u8* const src_pointer = staging + copy.srcOffset; + const VAddr device_addr = buffer.CpuAddr() + copy.dstOffset; + std::memcpy(src_pointer, std::bit_cast(device_addr), copy.size); + } + scheduler.DeferOperation([buffer = std::move(temp_buffer)]() mutable { buffer.reset(); }); + return src_buffer; + } +} + bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { boost::container::small_vector image_ids; texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 900a27aee..b509ce2d0 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -112,7 +112,7 @@ public: /// Invalidates any buffer in the logical page range. void InvalidateMemory(VAddr device_addr, u64 size); - /// Waits on pending downloads in the logical page range. + /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); /// Binds host vertex buffers for the current draw. @@ -194,6 +194,9 @@ private: void SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer); + vk::Buffer UploadCopies(Buffer& buffer, std::span copies, + size_t total_size_bytes); + bool SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size); void InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index ca87c7df0..ec0878c3b 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -62,17 +62,17 @@ public: } /// Removes all protection from a page and ensures GPU data has been flushed if requested - void InvalidateRegion(VAddr cpu_addr, u64 size, bool try_flush, auto&& on_flush) noexcept { + void InvalidateRegion(VAddr cpu_addr, u64 size, auto&& on_flush) noexcept { IteratePages( - cpu_addr, size, - [try_flush, &on_flush](RegionManager* manager, u64 offset, size_t size) { + cpu_addr, size, [&on_flush](RegionManager* manager, u64 offset, size_t size) { const bool should_flush = [&] { // Perform both the GPU modification check and CPU state change with the lock // in case we are racing with GPU thread trying to mark the page as GPU // modified. If we need to flush the flush function is going to perform CPU // state change. std::scoped_lock lk{manager->lock}; - if (try_flush && manager->template IsRegionModified(offset, size)) { + if (Config::readbacks() && + manager->template IsRegionModified(offset, size)) { return true; } manager->template ChangeRegionState( @@ -86,17 +86,27 @@ public: } /// Call 'func' for each CPU modified range and unmark those pages as CPU modified - void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func) { + void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func, + auto&& on_upload) { IteratePages(query_cpu_range, query_size, [&func, is_written](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; + manager->lock.lock(); manager->template ForEachModifiedRange( manager->GetCpuAddr() + offset, size, func); - if (is_written) { - manager->template ChangeRegionState( - manager->GetCpuAddr() + offset, size); + if (!is_written) { + manager->lock.unlock(); } }); + on_upload(); + if (!is_written) { + return; + } + IteratePages(query_cpu_range, query_size, + [&func, is_written](RegionManager* manager, u64 offset, size_t size) { + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + manager->lock.unlock(); + }); } /// Call 'func' for each GPU modified range and unmark those pages as GPU modified diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e4e026485..cca193831 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -272,6 +272,8 @@ void Rasterizer::EliminateFastClear() { void Rasterizer::Draw(bool is_indexed, u32 index_offset) { RENDERER_TRACE; + scheduler.PopPendingOperations(); + if (!FilterDraw()) { return; } @@ -317,6 +319,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 u32 max_count, VAddr count_address) { RENDERER_TRACE; + scheduler.PopPendingOperations(); + if (!FilterDraw()) { return; } @@ -380,6 +384,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 void Rasterizer::DispatchDirect() { RENDERER_TRACE; + scheduler.PopPendingOperations(); + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { @@ -407,6 +413,8 @@ void Rasterizer::DispatchDirect() { void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { RENDERER_TRACE; + scheduler.PopPendingOperations(); + const auto& cs_program = liverpool->GetCsRegs(); const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline(); if (!pipeline) { @@ -439,11 +447,12 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::ProcessFaults() { +void Rasterizer::EndCommandList() { if (fault_process_pending) { fault_process_pending = false; buffer_cache.ProcessFaultBuffer(); } + texture_cache.ProcessDownloadImages(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { @@ -649,8 +658,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (instance.IsNullDescriptorSupported()) { image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); } else { - auto& null_image_view = - texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc.view_info); + auto& null_image_view = texture_cache.FindTexture(VideoCore::NULL_IMAGE_ID, desc); image_infos.emplace_back(VK_NULL_HANDLE, *null_image_view.image_view, vk::ImageLayout::eGeneral); } @@ -664,7 +672,7 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin bound_images.emplace_back(image_id); auto& image = texture_cache.GetImage(image_id); - auto& image_view = texture_cache.FindTexture(image_id, desc.view_info); + auto& image_view = texture_cache.FindTexture(image_id, desc); if (image.binding.force_general || image.binding.is_target) { image.Transit(vk::ImageLayout::eGeneral, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4a978746c..1e1680258 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -68,7 +68,7 @@ public: void CpSync(); u64 Flush(); void Finish(); - void ProcessFaults(); + void EndCommandList(); PipelineCache& GetPipelineCache() { return pipeline_cache; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index e75a69924..4c4e17fe4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -101,6 +101,14 @@ void Scheduler::Wait(u64 tick) { } } +void Scheduler::PopPendingOperations() { + master_semaphore.Refresh(); + while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) { + pending_ops.front().callback(); + pending_ops.pop(); + } +} + void Scheduler::AllocateWorkerCommandBuffers() { const vk::CommandBufferBeginInfo begin_info = { .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, @@ -175,10 +183,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { AllocateWorkerCommandBuffers(); // Apply pending operations - while (!pending_ops.empty() && IsFree(pending_ops.front().gpu_tick)) { - pending_ops.front().callback(); - pending_ops.pop(); - } + PopPendingOperations(); } void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 8ddf00f6a..bd6fb549a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -317,6 +317,9 @@ public: /// Waits for the given tick to trigger on the GPU. void Wait(u64 tick); + /// Attempts to execute operations whose tick the GPU has caught up with. + void PopPendingOperations(); + /// Starts a new rendering scope with provided state. void BeginRendering(const RenderState& new_state); @@ -344,7 +347,11 @@ public: } /// Returns true when a tick has been triggered by the GPU. - [[nodiscard]] bool IsFree(u64 tick) const noexcept { + [[nodiscard]] bool IsFree(u64 tick) noexcept { + if (master_semaphore.IsFree(tick)) { + return true; + } + master_semaphore.Refresh(); return master_semaphore.IsFree(tick); } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index a50601af6..723b95892 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -5,7 +5,9 @@ #include #include "common/assert.h" +#include "common/config.h" #include "common/debug.h" +#include "core/memory.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/page_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -58,6 +60,50 @@ ImageId TextureCache::GetNullImage(const vk::Format format) { return null_id; } +void TextureCache::ProcessDownloadImages() { + for (const ImageId image_id : download_images) { + DownloadImageMemory(image_id); + } + download_images.clear(); +} + +void TextureCache::DownloadImageMemory(ImageId image_id) { + Image& image = slot_images[image_id]; + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; + } + auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download); + const u32 download_size = image.info.pitch * image.info.size.height * + image.info.resources.layers * (image.info.num_bits / 8); + ASSERT(download_size <= image.info.guest_size); + const auto [download, offset] = download_buffer.Map(download_size); + download_buffer.Commit(); + const vk::BufferImageCopy image_download = { + .bufferOffset = offset, + .bufferRowLength = image.info.pitch, + .bufferImageHeight = image.info.size.height, + .imageSubresource = + { + .aspectMask = image.info.IsDepthStencil() ? vk::ImageAspectFlagBits::eDepth + : vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = image.info.resources.layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {image.info.size.width, image.info.size.height, 1}, + }; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, + download_buffer.Handle(), image_download); + scheduler.DeferOperation([device_addr = image.info.guest_address, download, download_size] { + auto* memory = Core::Memory::Instance(); + memory->TryWriteBacking(std::bit_cast(device_addr), download, download_size); + }); +} + void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { if (image.hash == 0) { // Initialize hash @@ -169,7 +215,7 @@ ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, Bindi if (recreate) { auto new_info = requested_info; - new_info.resources = std::min(requested_info.resources, cache_image.info.resources); + new_info.resources = std::max(requested_info.resources, cache_image.info.resources); const auto new_image_id = slot_images.insert(instance, scheduler, new_info); RegisterImage(new_image_id); @@ -437,16 +483,27 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo return slot_image_views[view_id]; } -ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { +ImageView& TextureCache::FindTexture(ImageId image_id, const BaseDesc& desc) { Image& image = slot_images[image_id]; + if (desc.type == BindingType::Storage) { + image.flags |= ImageFlagBits::GpuModified; + if (Config::readbackLinearImages() && + image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) { + download_images.emplace(image_id); + } + } UpdateImage(image_id); - return RegisterImageView(image_id, view_info); + return RegisterImageView(image_id, desc.view_info); } ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { const ImageId image_id = FindImage(desc); Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; + if (Config::readbackLinearImages() && + image.info.tiling_mode == AmdGpu::TilingMode::Display_Linear) { + download_images.emplace(image_id); + } image.usage.render_target = 1u; UpdateImage(image_id); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 87228b84f..ff8ffb61c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -105,11 +106,14 @@ public: /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); + /// Schedules a copy of pending images for download back to CPU memory. + void ProcessDownloadImages(); + /// Retrieves the image handle of the image with the provided attributes. [[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {}); /// Retrieves an image view with the properties of the specified image id. - [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); + [[nodiscard]] ImageView& FindTexture(ImageId image_id, const BaseDesc& desc); /// Retrieves the render target with specified properties [[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc); @@ -252,6 +256,9 @@ private: /// Gets or creates a null image for a particular format. ImageId GetNullImage(vk::Format format); + /// Copies image memory back to CPU. + void DownloadImageMemory(ImageId image_id); + /// Create an image from the given parameters [[nodiscard]] ImageId InsertImage(const ImageInfo& info, VAddr cpu_addr); @@ -293,6 +300,7 @@ private: Common::SlotVector slot_image_views; tsl::robin_map samplers; tsl::robin_map null_images; + std::unordered_set download_images; PageTable page_table; std::mutex mutex;