Merge remote-tracking branch 'origin/main' into fullscreen

This commit is contained in:
kalaposfos13 2025-01-07 14:02:44 +01:00
commit 391e529d7a
114 changed files with 13739 additions and 20155 deletions

View File

@ -89,7 +89,7 @@ body:
- type: textarea - type: textarea
id: logs id: logs
attributes: attributes:
label: "Logs" label: "Log File"
description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`. description: Drag and drop the log file here. It can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
validations: validations:
required: false required: true

View File

@ -14,14 +14,14 @@ env:
jobs: jobs:
reuse: reuse:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: fsfe/reuse-action@v5 - uses: fsfe/reuse-action@v5
clang-format: clang-format:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -39,7 +39,7 @@ jobs:
run: ./.ci/clang-format.sh run: ./.ci/clang-format.sh
get-info: get-info:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
outputs: outputs:
date: ${{ steps.vars.outputs.date }} date: ${{ steps.vars.outputs.date }}
shorthash: ${{ steps.vars.outputs.shorthash }} shorthash: ${{ steps.vars.outputs.shorthash }}
@ -57,7 +57,7 @@ jobs:
echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
windows-sdl: windows-sdl:
runs-on: windows-latest runs-on: windows-2025
needs: get-info needs: get-info
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -101,7 +101,7 @@ jobs:
path: ${{github.workspace}}/build/shadPS4.exe path: ${{github.workspace}}/build/shadPS4.exe
windows-qt: windows-qt:
runs-on: windows-latest runs-on: windows-2025
needs: get-info needs: get-info
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4

View File

@ -209,6 +209,7 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp
set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp
src/core/libraries/gnmdriver/gnmdriver.h src/core/libraries/gnmdriver/gnmdriver.h
src/core/libraries/gnmdriver/gnmdriver_init.h
src/core/libraries/gnmdriver/gnm_error.h src/core/libraries/gnmdriver/gnm_error.h
) )
@ -335,6 +336,8 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp
src/core/libraries/share_play/shareplay.h src/core/libraries/share_play/shareplay.h
src/core/libraries/razor_cpu/razor_cpu.cpp src/core/libraries/razor_cpu/razor_cpu.cpp
src/core/libraries/razor_cpu/razor_cpu.h src/core/libraries/razor_cpu/razor_cpu.h
src/core/libraries/mouse/mouse.cpp
src/core/libraries/mouse/mouse.h
) )
set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h

View File

@ -48,6 +48,7 @@ static std::string updateChannel;
static std::string backButtonBehavior = "left"; static std::string backButtonBehavior = "left";
static bool useSpecialPad = false; static bool useSpecialPad = false;
static int specialPadClass = 1; static int specialPadClass = 1;
static bool isMotionControlsEnabled = true;
static bool isDebugDump = false; static bool isDebugDump = false;
static bool isShaderDebug = false; static bool isShaderDebug = false;
static bool isShowSplash = false; static bool isShowSplash = false;
@ -101,7 +102,7 @@ void setTrophyKey(std::string key) {
trophyKey = key; trophyKey = key;
} }
bool isNeoMode() { bool isNeoModeConsole() {
return isNeo; return isNeo;
} }
@ -177,6 +178,10 @@ int getSpecialPadClass() {
return specialPadClass; return specialPadClass;
} }
bool getIsMotionControlsEnabled() {
return isMotionControlsEnabled;
}
bool debugDump() { bool debugDump() {
return isDebugDump; return isDebugDump;
} }
@ -377,6 +382,10 @@ void setSpecialPadClass(int type) {
specialPadClass = type; specialPadClass = type;
} }
void setIsMotionControlsEnabled(bool use) {
isMotionControlsEnabled = use;
}
void setSeparateUpdateEnabled(bool use) { void setSeparateUpdateEnabled(bool use) {
separateupdatefolder = use; separateupdatefolder = use;
} }
@ -604,6 +613,7 @@ void load(const std::filesystem::path& path) {
backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left"); backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left");
useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false); useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false);
specialPadClass = toml::find_or<int>(input, "specialPadClass", 1); specialPadClass = toml::find_or<int>(input, "specialPadClass", 1);
isMotionControlsEnabled = toml::find_or<bool>(input, "isMotionControlsEnabled", true);
} }
if (data.contains("GPU")) { if (data.contains("GPU")) {
@ -720,6 +730,7 @@ void save(const std::filesystem::path& path) {
data["Input"]["backButtonBehavior"] = backButtonBehavior; data["Input"]["backButtonBehavior"] = backButtonBehavior;
data["Input"]["useSpecialPad"] = useSpecialPad; data["Input"]["useSpecialPad"] = useSpecialPad;
data["Input"]["specialPadClass"] = specialPadClass; data["Input"]["specialPadClass"] = specialPadClass;
data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled;
data["GPU"]["screenWidth"] = screenWidth; data["GPU"]["screenWidth"] = screenWidth;
data["GPU"]["screenHeight"] = screenHeight; data["GPU"]["screenHeight"] = screenHeight;
data["GPU"]["nullGpu"] = isNullGpu; data["GPU"]["nullGpu"] = isNullGpu;

View File

@ -17,10 +17,9 @@ void saveMainWindow(const std::filesystem::path& path);
std::string getTrophyKey(); std::string getTrophyKey();
void setTrophyKey(std::string key); void setTrophyKey(std::string key);
bool isNeoMode();
bool getIsFullscreen(); bool getIsFullscreen();
std::string getFullscreenMode(); std::string getFullscreenMode();
bool isNeoModeConsole();
bool getPlayBGM(); bool getPlayBGM();
int getBGMvolume(); int getBGMvolume();
bool getisTrophyPopupDisabled(); bool getisTrophyPopupDisabled();
@ -39,6 +38,7 @@ int getCursorHideTimeout();
std::string getBackButtonBehavior(); std::string getBackButtonBehavior();
bool getUseSpecialPad(); bool getUseSpecialPad();
int getSpecialPadClass(); int getSpecialPadClass();
bool getIsMotionControlsEnabled();
u32 getScreenWidth(); u32 getScreenWidth();
u32 getScreenHeight(); u32 getScreenHeight();
@ -86,6 +86,7 @@ void setCursorHideTimeout(int newcursorHideTimeout);
void setBackButtonBehavior(const std::string& type); void setBackButtonBehavior(const std::string& type);
void setUseSpecialPad(bool use); void setUseSpecialPad(bool use);
void setSpecialPadClass(int type); void setSpecialPadClass(int type);
void setIsMotionControlsEnabled(bool use);
void setLogType(const std::string& type); void setLogType(const std::string& type);
void setLogFilter(const std::string& type); void setLogFilter(const std::string& type);

View File

@ -7,6 +7,7 @@
#include <string_view> #include <string_view>
#include "assert.h" #include "assert.h"
#include "bit_field.h"
#include "singleton.h" #include "singleton.h"
#include "types.h" #include "types.h"
@ -16,6 +17,46 @@ class Emulator;
namespace Common { namespace Common {
union PSFAttributes {
/// Supports initial user's logout
BitField<0, 1, u32> support_initial_user_logout;
/// Enter button for the common dialog is cross.
BitField<1, 1, u32> enter_button_cross;
/// Warning dialog for PS Move is displayed in the options menu.
BitField<2, 1, u32> ps_move_warning;
/// Supports stereoscopic 3D.
BitField<3, 1, u32> support_stereoscopic_3d;
/// Suspends when PS button is pressed.
BitField<4, 1, u32> ps_button_suspend;
/// Enter button for the common dialog is assigned by the system software.
BitField<5, 1, u32> enter_button_system;
/// Overrides share menu behavior.
BitField<6, 1, u32> override_share_menu;
/// Suspends when PS button is pressed and special output resolution is set.
BitField<8, 1, u32> special_res_ps_button_suspend;
/// Enable HDCP.
BitField<9, 1, u32> enable_hdcp;
/// Disable HDCP for non-game.
BitField<10, 1, u32> disable_hdcp_non_game;
/// Supports PS VR.
BitField<14, 1, u32> support_ps_vr;
/// CPU mode (6 CPU)
BitField<15, 1, u32> six_cpu_mode;
/// CPU mode (7 CPU)
BitField<16, 1, u32> seven_cpu_mode;
/// Supports PS4 Pro (Neo) mode.
BitField<23, 1, u32> support_neo_mode;
/// Requires PS VR.
BitField<26, 1, u32> require_ps_vr;
/// Supports HDR.
BitField<29, 1, u32> support_hdr;
/// Display location.
BitField<31, 1, u32> display_location;
u32 raw{};
};
static_assert(sizeof(PSFAttributes) == 4);
class ElfInfo { class ElfInfo {
friend class Core::Emulator; friend class Core::Emulator;
@ -26,6 +67,7 @@ class ElfInfo {
std::string app_ver{}; std::string app_ver{};
u32 firmware_ver = 0; u32 firmware_ver = 0;
u32 raw_firmware_ver = 0; u32 raw_firmware_ver = 0;
PSFAttributes psf_attributes{};
public: public:
static constexpr u32 FW_15 = 0x1500000; static constexpr u32 FW_15 = 0x1500000;
@ -68,6 +110,11 @@ public:
ASSERT(initialized); ASSERT(initialized);
return raw_firmware_ver; return raw_firmware_ver;
} }
[[nodiscard]] const PSFAttributes& PSFAttributes() const {
ASSERT(initialized);
return psf_attributes;
}
}; };
} // namespace Common } // namespace Common

View File

@ -126,6 +126,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
SUB(Lib, Vdec2) \ SUB(Lib, Vdec2) \
SUB(Lib, Videodec) \ SUB(Lib, Videodec) \
SUB(Lib, RazorCpu) \ SUB(Lib, RazorCpu) \
SUB(Lib, Mouse) \
CLS(Frontend) \ CLS(Frontend) \
CLS(Render) \ CLS(Render) \
SUB(Render, Vulkan) \ SUB(Render, Vulkan) \

View File

@ -93,6 +93,7 @@ enum class Class : u8 {
Lib_Vdec2, ///< The LibSceVideodec2 implementation. Lib_Vdec2, ///< The LibSceVideodec2 implementation.
Lib_Videodec, ///< The LibSceVideodec implementation. Lib_Videodec, ///< The LibSceVideodec implementation.
Lib_RazorCpu, ///< The LibRazorCpu implementation. Lib_RazorCpu, ///< The LibRazorCpu implementation.
Lib_Mouse, ///< The LibSceMouse implementation
Frontend, ///< Emulator UI Frontend, ///< Emulator UI
Render, ///< Video Core Render, ///< Video Core
Render_Vulkan, ///< Vulkan backend Render_Vulkan, ///< Vulkan backend

View File

@ -66,7 +66,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
"GetColorSliceSize()", buffer.GetColorSliceSize(), "GetColorSliceSize()", buffer.GetColorSliceSize(),
"GetTilingMode()", buffer.GetTilingMode(), "GetTilingMode()", buffer.GetTilingMode(),
"IsTiled()", buffer.IsTiled(), "IsTiled()", buffer.IsTiled(),
"NumFormat()", buffer.NumFormat() "NumFormat()", buffer.GetNumberFmt()
); );
// clang-format on // clang-format on

View File

@ -3,13 +3,13 @@
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <shared_mutex> #include <stop_token>
#include <thread>
#include <magic_enum/magic_enum.hpp> #include <magic_enum/magic_enum.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/config.h" #include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/polyfill_thread.h"
#include "common/thread.h" #include "common/thread.h"
#include "core/libraries/audio/audioout.h" #include "core/libraries/audio/audioout.h"
#include "core/libraries/audio/audioout_backend.h" #include "core/libraries/audio/audioout_backend.h"
@ -18,7 +18,7 @@
namespace Libraries::AudioOut { namespace Libraries::AudioOut {
std::shared_mutex ports_mutex; std::mutex port_open_mutex{};
std::array<PortOut, SCE_AUDIO_OUT_NUM_PORTS> ports_out{}; std::array<PortOut, SCE_AUDIO_OUT_NUM_PORTS> ports_out{};
static std::unique_ptr<AudioOutBackend> audio; static std::unique_ptr<AudioOutBackend> audio;
@ -93,17 +93,20 @@ int PS4_SYSV_ABI sceAudioOutClose(s32 handle) {
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
std::scoped_lock lock(ports_mutex); std::unique_lock open_lock{port_open_mutex};
auto& port = ports_out.at(handle - 1); auto& port = ports_out.at(handle - 1);
if (!port.impl) { {
std::unique_lock lock{port.mutex};
if (!port.IsOpen()) {
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
port.output_thread.Stop();
std::free(port.output_buffer); std::free(port.output_buffer);
port.output_buffer = nullptr; port.output_buffer = nullptr;
port.output_ready = false; port.output_ready = false;
port.impl = nullptr; port.impl = nullptr;
}
// Stop outside of port lock scope to prevent deadlocks.
port.output_thread.Stop();
return ORBIS_OK; return ORBIS_OK;
} }
@ -172,15 +175,12 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
std::scoped_lock lock(ports_mutex); auto& port = ports_out.at(handle - 1);
const auto& port = ports_out.at(handle - 1); {
if (!port.impl) { std::unique_lock lock{port.mutex};
if (!port.IsOpen()) {
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
state->rerouteCounter = 0;
state->volume = 127;
switch (port.type) { switch (port.type) {
case OrbisAudioOutPort::Main: case OrbisAudioOutPort::Main:
case OrbisAudioOutPort::Bgm: case OrbisAudioOutPort::Bgm:
@ -200,7 +200,9 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta
default: default:
UNREACHABLE(); UNREACHABLE();
} }
state->rerouteCounter = 0;
state->volume = 127;
}
return ORBIS_OK; return ORBIS_OK;
} }
@ -279,15 +281,16 @@ static void AudioOutputThread(PortOut* port, const std::stop_token& stop) {
while (true) { while (true) {
timer.Start(); timer.Start();
{ {
std::unique_lock lock{port->output_mutex}; std::unique_lock lock{port->mutex};
Common::CondvarWait(port->output_cv, lock, stop, [&] { return port->output_ready; }); if (port->output_cv.wait(lock, stop, [&] { return port->output_ready; })) {
if (stop.stop_requested()) {
break;
}
port->impl->Output(port->output_buffer); port->impl->Output(port->output_buffer);
port->output_ready = false; port->output_ready = false;
} }
}
port->output_cv.notify_one(); port->output_cv.notify_one();
if (stop.stop_requested()) {
break;
}
timer.End(); timer.End();
} }
} }
@ -332,14 +335,17 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
} }
std::scoped_lock lock{ports_mutex}; std::unique_lock open_lock{port_open_mutex};
const auto port = const auto port =
std::ranges::find_if(ports_out, [&](const PortOut& p) { return p.impl == nullptr; }); std::ranges::find_if(ports_out, [&](const PortOut& p) { return !p.IsOpen(); });
if (port == ports_out.end()) { if (port == ports_out.end()) {
LOG_ERROR(Lib_AudioOut, "Audio ports are full"); LOG_ERROR(Lib_AudioOut, "Audio ports are full");
return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; return ORBIS_AUDIO_OUT_ERROR_PORT_FULL;
} }
{
std::unique_lock port_lock(port->mutex);
port->type = port_type; port->type = port_type;
port->format_info = GetFormatInfo(format); port->format_info = GetFormatInfo(format);
port->sample_rate = sample_rate; port->sample_rate = sample_rate;
@ -352,7 +358,7 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
port->output_ready = false; port->output_ready = false;
port->output_thread.Run( port->output_thread.Run(
[port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); }); [port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); });
}
return std::distance(ports_out.begin(), port) + 1; return std::distance(ports_out.begin(), port) + 1;
} }
@ -367,14 +373,13 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, void* ptr) {
} }
auto& port = ports_out.at(handle - 1); auto& port = ports_out.at(handle - 1);
if (!port.impl) { {
std::unique_lock lock{port.mutex};
if (!port.IsOpen()) {
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
{
std::unique_lock lock{port.output_mutex};
port.output_cv.wait(lock, [&] { return !port.output_ready; }); port.output_cv.wait(lock, [&] { return !port.output_ready; });
if (ptr != nullptr) { if (ptr != nullptr && port.IsOpen()) {
std::memcpy(port.output_buffer, ptr, port.BufferSize()); std::memcpy(port.output_buffer, ptr, port.BufferSize());
port.output_ready = true; port.output_ready = true;
} }
@ -488,19 +493,19 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) {
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
std::scoped_lock lock(ports_mutex);
auto& port = ports_out.at(handle - 1); auto& port = ports_out.at(handle - 1);
if (!port.impl) { {
std::unique_lock lock{port.mutex};
if (!port.IsOpen()) {
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT; return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
} }
for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) { for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) {
if (flag & 0x1u) { if (flag & 0x1u) {
port.volume[i] = vol[i]; port.volume[i] = vol[i];
} }
} }
port.impl->SetVolume(port.volume); port.impl->SetVolume(port.volume);
}
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -3,7 +3,9 @@
#pragma once #pragma once
#include <condition_variable>
#include <memory> #include <memory>
#include <mutex>
#include "common/bit_field.h" #include "common/bit_field.h"
#include "core/libraries/kernel/threads.h" #include "core/libraries/kernel/threads.h"
@ -74,10 +76,10 @@ struct AudioFormatInfo {
}; };
struct PortOut { struct PortOut {
std::mutex mutex;
std::unique_ptr<PortBackend> impl{}; std::unique_ptr<PortBackend> impl{};
void* output_buffer; void* output_buffer;
std::mutex output_mutex;
std::condition_variable_any output_cv; std::condition_variable_any output_cv;
bool output_ready; bool output_ready;
Kernel::Thread output_thread{}; Kernel::Thread output_thread{};
@ -88,6 +90,10 @@ struct PortOut {
u32 buffer_frames; u32 buffer_frames;
std::array<s32, 8> volume; std::array<s32, 8> volume;
[[nodiscard]] bool IsOpen() const {
return impl != nullptr;
}
[[nodiscard]] u32 BufferSize() const { [[nodiscard]] u32 BufferSize() const {
return buffer_frames * format_info.FrameSize(); return buffer_frames * format_info.FrameSize();
} }

View File

@ -14,14 +14,7 @@ namespace Libraries::AudioOut {
class SDLPortBackend : public PortBackend { class SDLPortBackend : public PortBackend {
public: public:
explicit SDLPortBackend(const PortOut& port) explicit SDLPortBackend(const PortOut& port)
: frame_size(port.format_info.FrameSize()), buffer_size(port.BufferSize()) { : frame_size(port.format_info.FrameSize()), guest_buffer_size(port.BufferSize()) {
// We want the latency for delivering frames out to be as small as possible,
// so set the sample frames hint to the number of frames per buffer.
const auto samples_num_str = std::to_string(port.buffer_frames);
if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) {
LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}",
samples_num_str, SDL_GetError());
}
const SDL_AudioSpec fmt = { const SDL_AudioSpec fmt = {
.format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE, .format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE,
.channels = port.format_info.num_channels, .channels = port.format_info.num_channels,
@ -33,7 +26,7 @@ public:
LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError()); LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError());
return; return;
} }
queue_threshold = CalculateQueueThreshold(); CalculateQueueThreshold();
if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(), if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(),
port.format_info.num_channels)) { port.format_info.num_channels)) {
LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}", LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}",
@ -71,9 +64,9 @@ public:
queue_threshold); queue_threshold);
SDL_ClearAudioStream(stream); SDL_ClearAudioStream(stream);
// Recalculate the threshold in case this happened because of a device change. // Recalculate the threshold in case this happened because of a device change.
queue_threshold = CalculateQueueThreshold(); CalculateQueueThreshold();
} }
if (!SDL_PutAudioStreamData(stream, ptr, static_cast<int>(buffer_size))) { if (!SDL_PutAudioStreamData(stream, ptr, static_cast<int>(guest_buffer_size))) {
LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError());
} }
} }
@ -91,7 +84,7 @@ public:
} }
private: private:
[[nodiscard]] u32 CalculateQueueThreshold() const { void CalculateQueueThreshold() {
SDL_AudioSpec discard; SDL_AudioSpec discard;
int sdl_buffer_frames; int sdl_buffer_frames;
if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard, if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard,
@ -100,13 +93,22 @@ private:
SDL_GetError()); SDL_GetError());
sdl_buffer_frames = 0; sdl_buffer_frames = 0;
} }
return std::max<u32>(buffer_size, sdl_buffer_frames * frame_size) * 4; const auto sdl_buffer_size = sdl_buffer_frames * frame_size;
const auto new_threshold = std::max(guest_buffer_size, sdl_buffer_size) * 4;
if (host_buffer_size != sdl_buffer_size || queue_threshold != new_threshold) {
host_buffer_size = sdl_buffer_size;
queue_threshold = new_threshold;
LOG_INFO(Lib_AudioOut,
"SDL audio buffers: guest = {} bytes, host = {} bytes, threshold = {} bytes",
guest_buffer_size, host_buffer_size, queue_threshold);
}
} }
u32 frame_size; u32 frame_size;
u32 buffer_size; u32 guest_buffer_size;
u32 queue_threshold; u32 host_buffer_size{};
SDL_AudioStream* stream; u32 queue_threshold{};
SDL_AudioStream* stream{};
}; };
std::unique_ptr<PortBackend> SDLAudioOut::Open(PortOut& port) { std::unique_ptr<PortBackend> SDLAudioOut::Open(PortOut& port) {

View File

@ -12,6 +12,7 @@
#include "core/address_space.h" #include "core/address_space.h"
#include "core/debug_state.h" #include "core/debug_state.h"
#include "core/libraries/gnmdriver/gnm_error.h" #include "core/libraries/gnmdriver/gnm_error.h"
#include "core/libraries/gnmdriver/gnmdriver_init.h"
#include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/process.h" #include "core/libraries/kernel/process.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
@ -54,244 +55,11 @@ enum ShaderStages : u32 {
static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu}; static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu};
static constexpr auto HwInitPacketSize = 0x100u; // Gates use of what appear to be the neo-mode init sequences but with the older
// IA_MULTI_VGT_PARAM register address. No idea what this is for as the ioctl
// clang-format off // that controls it is still a mystery, but leaving the sequences in gated behind
static constexpr std::array InitSequence{ // this flag in case we need it in the future.
// A fake preamble to mimic context reset sent by FW static constexpr bool UseNeoCompatSequences = false;
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6000000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence.size() == 0x73 + 2);
static constexpr std::array InitSequence175{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence175.size() == 0x73 + 2);
static constexpr std::array InitSequence200{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence200.size() == 0x76 + 2);
static constexpr std::array InitSequence350{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x2aau, 0xffu,
};
static_assert(InitSequence350.size() == 0x7c + 2);
static constexpr std::array CtxInitSequence{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0002f00u, 1u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0111000u, 0u
};
static_assert(CtxInitSequence.size() == 0x0f);
static constexpr std::array CtxInitSequence400{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0016900u, 0x2aau, 0xffu,
0xc09e1000u,
};
static_assert(CtxInitSequence400.size() == 0x61);
// clang-format on
// In case if `submitDone` is issued we need to block submissions until GPU idle // In case if `submitDone` is issued we need to block submissions until GPU idle
static u32 submission_lock{}; static u32 submission_lock{};
@ -317,6 +85,14 @@ static void WaitGpuIdle() {
cv_lock.wait(lock, [] { return submission_lock == 0; }); cv_lock.wait(lock, [] { return submission_lock == 0; });
} }
// Write a special ending NOP packet with N DWs data block
static inline u32* WriteTrailingNop(u32* cmdbuf, u32 data_block_size) {
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
return cmdbuf + data_block_size + 1 /* header */;
}
// Write a special ending NOP packet with N DWs data block // Write a special ending NOP packet with N DWs data block
template <u32 data_block_size> template <u32 data_block_size>
static inline u32* WriteTrailingNop(u32* cmdbuf) { static inline u32* WriteTrailingNop(u32* cmdbuf) {
@ -607,9 +383,16 @@ s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset,
return -1; return -1;
} }
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() { s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier) {
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); if (cmdbuf != nullptr && size == 8 && args != 0 && ((args & 3u) == 0)) {
cmdbuf[0] = 0xc0021602 | (modifier & 1u);
*(VAddr*)(&cmdbuf[1]) = args;
cmdbuf[3] = (modifier & 0x18) | 1u;
cmdbuf[4] = 0xc0021000;
cmdbuf[5] = 0;
return ORBIS_OK; return ORBIS_OK;
}
return ORBIS_FAIL;
} }
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) { u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
@ -619,17 +402,30 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
return 0; return 0;
} }
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x216u, cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x216u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x217u, cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x217u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1 0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
if (sceKernelIsNeoMode()) {
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x219u,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE2
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x21au,
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE3
}
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(cmdbuf, 6); cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(cmdbuf, 6);
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0u); cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0xau);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef); cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, sceKernelIsNeoMode() ? 0xe9 : 0xef);
cmdbuf = WriteBody(cmdbuf, 0xau, 0u); cmdbuf = WriteBody(cmdbuf, 0u);
return HwInitPacketSize; return HwInitPacketSize;
} }
@ -646,7 +442,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr
draw_index->index_base_lo = u32(index_addr); draw_index->index_base_lo = u32(index_addr);
draw_index->index_base_hi = u32(index_addr >> 32); draw_index->index_base_hi = u32(index_addr >> 32);
draw_index->index_count = index_count; draw_index->index_count = index_count;
draw_index->draw_initiator = 0; draw_index->draw_initiator = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
WriteTrailingNop<3>(cmdbuf + 6); WriteTrailingNop<3>(cmdbuf + 6);
return ORBIS_OK; return ORBIS_OK;
@ -659,8 +455,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32
if (cmdbuf && (size == 7) && if (cmdbuf && (size == 7) &&
(flags & 0x1ffffffe) == 0) { // no predication will be set in the packet (flags & 0x1ffffffe) == 0) { // no predication will be set in the packet
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(cmdbuf, PM4ShaderType::ShaderGraphics, cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(
index_count, 2u); cmdbuf, PM4ShaderType::ShaderGraphics, index_count,
sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u);
WriteTrailingNop<3>(cmdbuf); WriteTrailingNop<3>(cmdbuf);
return ORBIS_OK; return ORBIS_OK;
} }
@ -684,7 +481,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset,
cmdbuf[0] = data_offset; cmdbuf[0] = data_offset;
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset; cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[3] = 0; cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u;
cmdbuf += 4; cmdbuf += 4;
WriteTrailingNop<3>(cmdbuf); WriteTrailingNop<3>(cmdbuf);
@ -699,8 +496,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
u32 flags) { u32 flags) {
LOG_TRACE(Lib_GnmDriver, "called"); LOG_TRACE(Lib_GnmDriver, "called");
if (cmdbuf && (size == 16) && (shader_stage < ShaderStages::Max) && if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) &&
(vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u)) { (shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) &&
(instance_sgpr_offset < 0x10u)) {
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 2); cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 2);
cmdbuf = WriteBody(cmdbuf, 0u); cmdbuf = WriteBody(cmdbuf, 0u);
@ -719,7 +517,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
cmdbuf[4] = max_count; cmdbuf[4] = max_count;
*(u64*)(&cmdbuf[5]) = count_addr; *(u64*)(&cmdbuf[5]) = count_addr;
cmdbuf[7] = sizeof(DrawIndexedIndirectArgs); cmdbuf[7] = sizeof(DrawIndexedIndirectArgs);
cmdbuf[8] = 0; cmdbuf[8] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
cmdbuf += 9; cmdbuf += 9;
WriteTrailingNop<2>(cmdbuf); WriteTrailingNop<2>(cmdbuf);
@ -748,7 +546,8 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset,
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable; const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexOffset2>( cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexOffset2>(
cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate); cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate);
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count, 0u); cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count,
sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u);
WriteTrailingNop<3>(cmdbuf); WriteTrailingNop<3>(cmdbuf);
return ORBIS_OK; return ORBIS_OK;
@ -772,7 +571,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32
cmdbuf[0] = data_offset; cmdbuf[0] = data_offset;
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset; cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset; cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
cmdbuf[3] = 2; // auto index cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u; // auto index
cmdbuf += 4; cmdbuf += 4;
WriteTrailingNop<3>(cmdbuf); WriteTrailingNop<3>(cmdbuf);
@ -801,6 +600,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
} }
const auto& SetupContext = [](u32* cmdbuf, u32 size, bool clear_state) { const auto& SetupContext = [](u32* cmdbuf, u32 size, bool clear_state) {
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
if (clear_state) { if (clear_state) {
cmdbuf = ClearContextState(cmdbuf); cmdbuf = ClearContextState(cmdbuf);
} }
@ -808,10 +608,8 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4); std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4);
cmdbuf += InitSequence.size() - 2; cmdbuf += InitSequence.size() - 2;
const auto cmdbuf_left = const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1; WriteTrailingNop(cmdbuf, cmdbuf_left);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
return HwInitPacketSize; return HwInitPacketSize;
}; };
@ -826,12 +624,13 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
return 0; return 0;
} }
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
cmdbuf = ClearContextState(cmdbuf); cmdbuf = ClearContextState(cmdbuf);
std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4); std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4);
cmdbuf += InitSequence175.size() - 2; cmdbuf += InitSequence175.size() - 2;
constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1; const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
WriteTrailingNop<cmdbuf_left>(cmdbuf); WriteTrailingNop(cmdbuf, cmdbuf_left);
return HwInitPacketSize; return HwInitPacketSize;
} }
@ -844,17 +643,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
} }
const auto& SetupContext200 = [](u32* cmdbuf, u32 size, bool clear_state) { const auto& SetupContext200 = [](u32* cmdbuf, u32 size, bool clear_state) {
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
if (clear_state) { if (clear_state) {
cmdbuf = ClearContextState(cmdbuf); cmdbuf = ClearContextState(cmdbuf);
} }
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
std::memcpy(cmdbuf, &InitSequence200Neo[2], (InitSequence200Neo.size() - 2) * 4);
cmdbuf += InitSequence200Neo.size() - 2;
} else {
std::memcpy(cmdbuf, &InitSequence200NeoCompat[2],
(InitSequence200NeoCompat.size() - 2) * 4);
cmdbuf += InitSequence200NeoCompat.size() - 2;
}
} else {
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4); std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
cmdbuf += InitSequence200.size() - 2; cmdbuf += InitSequence200.size() - 2;
}
const auto cmdbuf_left = const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1; WriteTrailingNop(cmdbuf, cmdbuf_left);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
return HwInitPacketSize; return HwInitPacketSize;
}; };
@ -870,17 +679,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
} }
const auto& SetupContext350 = [](u32* cmdbuf, u32 size, bool clear_state) { const auto& SetupContext350 = [](u32* cmdbuf, u32 size, bool clear_state) {
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
if (clear_state) { if (clear_state) {
cmdbuf = ClearContextState(cmdbuf); cmdbuf = ClearContextState(cmdbuf);
} }
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
std::memcpy(cmdbuf, &InitSequence350Neo[2], (InitSequence350Neo.size() - 2) * 4);
cmdbuf += InitSequence350Neo.size() - 2;
} else {
std::memcpy(cmdbuf, &InitSequence350NeoCompat[2],
(InitSequence350NeoCompat.size() - 2) * 4);
cmdbuf += InitSequence350NeoCompat.size() - 2;
}
} else {
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4); std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
cmdbuf += InitSequence350.size() - 2; cmdbuf += InitSequence350.size() - 2;
}
const auto cmdbuf_left = const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1; WriteTrailingNop(cmdbuf, cmdbuf_left);
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
cmdbuf = WriteBody(cmdbuf, 0u);
return HwInitPacketSize; return HwInitPacketSize;
}; };
@ -896,7 +715,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size) {
return 0; return 0;
} }
if (sceKernelIsNeoMode()) {
std::memcpy(cmdbuf, CtxInitSequenceNeo.data(), CtxInitSequenceNeo.size() * 4);
} else {
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4); std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
}
return CtxInitPacketSize; return CtxInitPacketSize;
} }
@ -908,7 +731,16 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size) {
return 0; return 0;
} }
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
std::memcpy(cmdbuf, CtxInitSequence400Neo.data(), CtxInitSequence400Neo.size() * 4);
} else {
std::memcpy(cmdbuf, CtxInitSequence400NeoCompat.data(),
CtxInitSequence400NeoCompat.size() * 4);
}
} else {
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4); std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
}
return CtxInitPacketSize; return CtxInitPacketSize;
} }
@ -1030,7 +862,8 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
LOG_TRACE(Lib_GnmDriver, "called"); LOG_TRACE(Lib_GnmDriver, "called");
return Config::isNeoMode() ? 911'000'000 : 800'000'000; // On console this uses an ioctl check, but we assume it is equal to just checking for neo mode.
return sceKernelIsNeoMode() ? 911'000'000 : 800'000'000;
} }
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() { int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
@ -1369,7 +1202,15 @@ s32 PS4_SYSV_ABI sceGnmResetVgtControl(u32* cmdbuf, u32 size) {
if (cmdbuf == nullptr || size != 3) { if (cmdbuf == nullptr || size != 3) {
return -1; return -1;
} }
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u, 0x6d007fu); // IA_MULTI_VGT_PARAM
} else {
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, 0xd00ffu); // IA_MULTI_VGT_PARAM
}
} else {
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
}
return ORBIS_OK; return ORBIS_OK;
} }
@ -1830,9 +1671,25 @@ s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_mi
return -1; return -1;
} }
if (sceKernelIsNeoMode()) {
const u32 wd_switch_on_eop = u32(wd_switch_only_on_eop_mode != 0) << 0x14;
const u32 switch_on_eoi = u32(wd_switch_only_on_eop_mode == 0) << 0x13;
const u32 reg_value =
wd_switch_only_on_eop_mode != 0
? (partial_vs_wave_mode & 1) << 0x10 | prim_group_sz_minus_one | wd_switch_on_eop |
switch_on_eoi | 0x40000u
: prim_group_sz_minus_one & 0x1cffffu | wd_switch_on_eop | switch_on_eoi | 0x50000u;
if (!UseNeoCompatSequences) {
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u,
reg_value | 0x600000u); // IA_MULTI_VGT_PARAM
} else {
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, reg_value); // IA_MULTI_VGT_PARAM
}
} else {
const u32 reg_value = const u32 reg_value =
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu); ((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
}
return ORBIS_OK; return ORBIS_OK;
} }
@ -2215,10 +2072,26 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
if (sdk_version <= 0x1ffffffu) { if (sdk_version <= 0x1ffffffu) {
liverpool->SubmitGfx(InitSequence, {}); liverpool->SubmitGfx(InitSequence, {});
} else if (sdk_version <= 0x3ffffffu) { } else if (sdk_version <= 0x3ffffffu) {
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
liverpool->SubmitGfx(InitSequence200Neo, {});
} else {
liverpool->SubmitGfx(InitSequence200NeoCompat, {});
}
} else {
liverpool->SubmitGfx(InitSequence200, {}); liverpool->SubmitGfx(InitSequence200, {});
}
} else {
if (sceKernelIsNeoMode()) {
if (!UseNeoCompatSequences) {
liverpool->SubmitGfx(InitSequence350Neo, {});
} else {
liverpool->SubmitGfx(InitSequence350NeoCompat, {});
}
} else { } else {
liverpool->SubmitGfx(InitSequence350, {}); liverpool->SubmitGfx(InitSequence350, {});
} }
}
send_init_packet = false; send_init_packet = false;
} }

View File

@ -39,7 +39,7 @@ int PS4_SYSV_ABI sceGnmDisableMipStatsReport();
s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y, s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
u32 threads_z, u32 flags); u32 threads_z, u32 flags);
s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags); s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags);
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(); s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier);
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size); u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size);
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr, s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
u32 flags, u32 type); u32 flags, u32 type);

View File

@ -0,0 +1,542 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <array>
namespace Libraries::GnmDriver {
constexpr auto HwInitPacketSize = 0x100u;
// clang-format off
constexpr std::array InitSequence{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6000000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence.size() == 0x73 + 2);
constexpr std::array InitSequence175{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1ffu,
0xc0017600u, 0x46u, 0x1ffu,
0xc0017600u, 0x87u, 0x1ffu,
0xc0017600u, 0xc7u, 0x1ffu,
0xc0017600u, 0x107u, 0u,
0xc0017600u, 0x147u, 0x1ffu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence175.size() == 0x73 + 2);
constexpr std::array InitSequence200{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
};
static_assert(InitSequence200.size() == 0x76 + 2);
constexpr std::array InitSequence200Neo{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0017900u, 0x40000258u, 0x6d007fu,
};
static_assert(InitSequence200Neo.size() == 0x83 + 2);
constexpr std::array InitSequence200NeoCompat{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x100002aau, 0xd00ffu,
};
static_assert(InitSequence200NeoCompat.size() == 0x83 + 2);
constexpr std::array InitSequence350{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x2aau, 0xffu,
};
static_assert(InitSequence350.size() == 0x7c + 2);
constexpr std::array InitSequence350Neo{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0017900u, 0x40000258u, 0x6d007fu,
};
static_assert(InitSequence350Neo.size() == 0x86 + 2);
constexpr std::array InitSequence350NeoCompat{
// A fake preamble to mimic context reset sent by FW
0xc0001200u, 0u, // IT_CLEAR_STATE
// Actual init state sequence
0xc0017600u, 0x216u, 0xffffffffu,
0xc0017600u, 0x217u, 0xffffffffu,
0xc0017600u, 0x219u, 0xffffffffu,
0xc0017600u, 0x21au, 0xffffffffu,
0xc0017600u, 0x215u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0017600u, 7u, 0x1701ffu,
0xc0017600u, 0x46u, 0x1701fdu,
0xc0017600u, 0x87u, 0x1701ffu,
0xc0017600u, 0xc7u, 0x1701fdu,
0xc0017600u, 0x107u, 0x17u,
0xc0017600u, 0x147u, 0x1701fdu,
0xc0017600u, 0x47u, 0x1cu,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x200u, 0xe0000000u,
0xc0016900u, 0x100002aau, 0xd00ffu,
};
static_assert(InitSequence350NeoCompat.size() == 0x86 + 2);
constexpr std::array CtxInitSequence{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0002f00u, 1u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0111000u, 0u
};
static_assert(CtxInitSequence.size() == 0x0f);
constexpr std::array CtxInitSequenceNeo{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0002f00u, 1u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc00d1000, 0u
};
static_assert(CtxInitSequenceNeo.size() == 0x13);
constexpr std::array CtxInitSequence400{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0016900u, 0x2aau, 0xffu,
0xc09e1000u,
};
static_assert(CtxInitSequence400.size() == 0x61);
constexpr std::array CtxInitSequence400Neo{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0017900u, 0x40000258u, 0x6d007fu,
0xc09a1000u,
};
static_assert(CtxInitSequence400Neo.size() == 0x65);
constexpr std::array CtxInitSequence400NeoCompat{
0xc0012800u, 0x80000000u, 0x80000000u,
0xc0001200u, 0u,
0xc0016900u, 0x2f9u, 0x2du,
0xc0016900u, 0x282u, 8u,
0xc0016900u, 0x280u, 0x80008u,
0xc0016900u, 0x281u, 0xffff0000u,
0xc0016900u, 0x204u, 0u,
0xc0016900u, 0x206u, 0x43fu,
0xc0016900u, 0x83u, 0xffffu,
0xc0016900u, 0x317u, 0x10u,
0xc0016900u, 0x2fau, 0x3f800000u,
0xc0016900u, 0x2fcu, 0x3f800000u,
0xc0016900u, 0x2fbu, 0x3f800000u,
0xc0016900u, 0x2fdu, 0x3f800000u,
0xc0016900u, 0x202u, 0xcc0010u,
0xc0016900u, 0x30eu, 0xffffffffu,
0xc0016900u, 0x30fu, 0xffffffffu,
0xc0002f00u, 1u,
0xc0016900u, 0x1b1u, 2u,
0xc0016900u, 0x101u, 0u,
0xc0016900u, 0x100u, 0xffffffffu,
0xc0016900u, 0x103u, 0u,
0xc0016900u, 0x284u, 0u,
0xc0016900u, 0x290u, 0u,
0xc0016900u, 0x2aeu, 0u,
0xc0016900u, 0x102u, 0u,
0xc0016900u, 0x292u, 0u,
0xc0016900u, 0x293u, 0x6020000u,
0xc0016900u, 0x2f8u, 0u,
0xc0016900u, 0x2deu, 0x1e9u,
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
0xc0016900u, 0x100002aau, 0xd00ffu,
0xc09a1000u,
};
static_assert(CtxInitSequence400Neo.size() == 0x65);
// clang-format on
} // namespace Libraries::GnmDriver

View File

@ -505,13 +505,13 @@ int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) {
return result; return result;
} }
static constexpr int MAX_PTR_APERTURES = 3; static constexpr int MAX_PRT_APERTURES = 3;
static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000; static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000;
static constexpr size_t PRT_AREA_SIZE = 0xec00000000; static constexpr size_t PRT_AREA_SIZE = 0xec00000000;
static std::array<std::pair<VAddr, size_t>, MAX_PTR_APERTURES> PrtApertures{}; static std::array<std::pair<VAddr, size_t>, MAX_PRT_APERTURES> PrtApertures{};
int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) { int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
if (id < 0 || id >= MAX_PTR_APERTURES) { if (id < 0 || id >= MAX_PRT_APERTURES) {
return ORBIS_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
} }
@ -531,12 +531,12 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* addres, size_t* size) { int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* address, size_t* size) {
if (id < 0 || id >= MAX_PTR_APERTURES) { if (id < 0 || id >= MAX_PRT_APERTURES) {
return ORBIS_KERNEL_ERROR_EINVAL; return ORBIS_KERNEL_ERROR_EINVAL;
} }
std::tie(*addres, *size) = PrtApertures[id]; std::tie(*address, *size) = PrtApertures[id];
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -14,7 +14,8 @@ namespace Libraries::Kernel {
int PS4_SYSV_ABI sceKernelIsNeoMode() { int PS4_SYSV_ABI sceKernelIsNeoMode() {
LOG_DEBUG(Kernel_Sce, "called"); LOG_DEBUG(Kernel_Sce, "called");
return Config::isNeoMode(); return Config::isNeoModeConsole() &&
Common::ElfInfo::Instance().PSFAttributes().support_neo_mode;
} }
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) { int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {

View File

@ -55,6 +55,9 @@ public:
stop.request_stop(); stop.request_stop();
Join(); Join();
} }
thread = nullptr;
func = nullptr;
stop = std::stop_source{};
} }
static void* PS4_SYSV_ABI RunWrapper(void* arg) { static void* PS4_SYSV_ABI RunWrapper(void* arg) {

View File

@ -18,6 +18,7 @@
#include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libc_internal/libc_internal.h"
#include "core/libraries/libpng/pngdec.h" #include "core/libraries/libpng/pngdec.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/mouse/mouse.h"
#include "core/libraries/move/move.h" #include "core/libraries/move/move.h"
#include "core/libraries/network/http.h" #include "core/libraries/network/http.h"
#include "core/libraries/network/net.h" #include "core/libraries/network/net.h"
@ -97,6 +98,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
Libraries::Move::RegisterlibSceMove(sym); Libraries::Move::RegisterlibSceMove(sym);
Libraries::Fiber::RegisterlibSceFiber(sym); Libraries::Fiber::RegisterlibSceFiber(sym);
Libraries::JpegEnc::RegisterlibSceJpegEnc(sym); Libraries::JpegEnc::RegisterlibSceJpegEnc(sym);
Libraries::Mouse::RegisterlibSceMouse(sym);
} }
} // namespace Libraries } // namespace Libraries

View File

@ -0,0 +1,99 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
// Generated By moduleGenerator
#include "common/logging/log.h"
#include "core/libraries/error_codes.h"
#include "core/libraries/libs.h"
#include "mouse.h"
namespace Libraries::Mouse {
int PS4_SYSV_ABI sceMouseClose() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseConnectPort() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDebugGetDeviceId() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDeviceOpen() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDisconnectDevice() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseDisconnectPort() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseGetDeviceInfo() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseInit() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseMbusInit() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseOpen() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseRead() {
LOG_DEBUG(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseSetHandType() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseSetPointerSpeed() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
int PS4_SYSV_ABI sceMouseSetProcessPrivilege() {
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
return ORBIS_OK;
}
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("cAnT0Rw-IwU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseClose);
LIB_FUNCTION("Ymyy1HSSJLQ", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseConnectPort);
LIB_FUNCTION("BRXOoXQtb+k", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDebugGetDeviceId);
LIB_FUNCTION("WiGKINCZWkc", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDeviceOpen);
LIB_FUNCTION("eDQTFHbgeTU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectDevice);
LIB_FUNCTION("jJP1vYMEPd4", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectPort);
LIB_FUNCTION("QA9Qupz3Zjw", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseGetDeviceInfo);
LIB_FUNCTION("Qs0wWulgl7U", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseInit);
LIB_FUNCTION("1FeceR5YhAo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseMbusInit);
LIB_FUNCTION("RaqxZIf6DvE", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseOpen);
LIB_FUNCTION("x8qnXqh-tiM", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseRead);
LIB_FUNCTION("crkFfp-cmFo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetHandType);
LIB_FUNCTION("ghLUU2Z5Lcg", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetPointerSpeed);
LIB_FUNCTION("6aANndpS0Wo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetProcessPrivilege);
};
} // namespace Libraries::Mouse

View File

@ -0,0 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Core::Loader {
class SymbolsResolver;
}
namespace Libraries::Mouse {
int PS4_SYSV_ABI sceMouseClose();
int PS4_SYSV_ABI sceMouseConnectPort();
int PS4_SYSV_ABI sceMouseDebugGetDeviceId();
int PS4_SYSV_ABI sceMouseDeviceOpen();
int PS4_SYSV_ABI sceMouseDisconnectDevice();
int PS4_SYSV_ABI sceMouseDisconnectPort();
int PS4_SYSV_ABI sceMouseGetDeviceInfo();
int PS4_SYSV_ABI sceMouseInit();
int PS4_SYSV_ABI sceMouseMbusInit();
int PS4_SYSV_ABI sceMouseOpen();
int PS4_SYSV_ABI sceMouseRead();
int PS4_SYSV_ABI sceMouseSetHandType();
int PS4_SYSV_ABI sceMouseSetPointerSpeed();
int PS4_SYSV_ABI sceMouseSetProcessPrivilege();
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::Mouse

View File

@ -972,11 +972,8 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() {
} }
int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId user_id, OrbisNpId* np_id) { int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId user_id, OrbisNpId* np_id) {
LOG_INFO(Lib_NpManager, "user_id {}", user_id); LOG_DEBUG(Lib_NpManager, "user_id {}", user_id);
const auto name = Config::getUserName(); return ORBIS_NP_ERROR_SIGNED_OUT;
std::memset(np_id, 0, sizeof(OrbisNpId));
name.copy(np_id->handle.data, sizeof(np_id->handle.data));
return ORBIS_OK;
} }
int PS4_SYSV_ABI sceNpGetNpReachabilityState() { int PS4_SYSV_ABI sceNpGetNpReachabilityState() {
@ -986,10 +983,7 @@ int PS4_SYSV_ABI sceNpGetNpReachabilityState() {
int PS4_SYSV_ABI sceNpGetOnlineId(s32 user_id, OrbisNpOnlineId* online_id) { int PS4_SYSV_ABI sceNpGetOnlineId(s32 user_id, OrbisNpOnlineId* online_id) {
LOG_DEBUG(Lib_NpManager, "user_id {}", user_id); LOG_DEBUG(Lib_NpManager, "user_id {}", user_id);
const auto name = Config::getUserName(); return ORBIS_NP_ERROR_SIGNED_OUT;
std::memset(online_id, 0, sizeof(OrbisNpOnlineId));
name.copy(online_id->data, sizeof(online_id->data));
return ORBIS_OK;
} }
int PS4_SYSV_ABI sceNpGetParentalControlInfo() { int PS4_SYSV_ABI sceNpGetParentalControlInfo() {

View File

@ -104,8 +104,8 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn
pInfo->touchPadInfo.pixelDensity = 1; pInfo->touchPadInfo.pixelDensity = 1;
pInfo->touchPadInfo.resolution.x = 1920; pInfo->touchPadInfo.resolution.x = 1920;
pInfo->touchPadInfo.resolution.y = 950; pInfo->touchPadInfo.resolution.y = 950;
pInfo->stickInfo.deadZoneLeft = 20; pInfo->stickInfo.deadZoneLeft = 2;
pInfo->stickInfo.deadZoneRight = 20; pInfo->stickInfo.deadZoneRight = 2;
pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD; pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD;
pInfo->connectedCount = 1; pInfo->connectedCount = 1;
pInfo->connected = true; pInfo->connected = true;

View File

@ -157,7 +157,7 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh
} }
for (int i = 0; i < numberOfEntries; i++) { for (int i = 0; i < numberOfEntries; i++) {
if (chunkIds[i] <= playgo->chunks.size()) { if (chunkIds[i] < playgo->chunks.size()) {
outLoci[i] = OrbisPlayGoLocus::LocalFast; outLoci[i] = OrbisPlayGoLocus::LocalFast;
} else { } else {
outLoci[i] = OrbisPlayGoLocus::NotDownloaded; outLoci[i] = OrbisPlayGoLocus::NotDownloaded;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <span> #include <span>
#include <thread>
#include <vector> #include <vector>
#include <core/libraries/system/msgdialog_ui.h> #include <core/libraries/system/msgdialog_ui.h>
@ -1139,10 +1140,6 @@ Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getPar
LOG_INFO(Lib_SaveData, "called without save memory initialized"); LOG_INFO(Lib_SaveData, "called without save memory initialized");
return Error::MEMORY_NOT_READY; return Error::MEMORY_NOT_READY;
} }
if (SaveMemory::IsSaving()) {
LOG_TRACE(Lib_SaveData, "called while saving");
return Error::BUSY_FOR_SAVING;
}
LOG_DEBUG(Lib_SaveData, "called"); LOG_DEBUG(Lib_SaveData, "called");
auto data = getParam->data; auto data = getParam->data;
if (data != nullptr) { if (data != nullptr) {
@ -1501,10 +1498,16 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2*
LOG_INFO(Lib_SaveData, "called without save memory initialized"); LOG_INFO(Lib_SaveData, "called without save memory initialized");
return Error::MEMORY_NOT_READY; return Error::MEMORY_NOT_READY;
} }
if (SaveMemory::IsSaving()) {
int count = 0;
while (++count < 100 && SaveMemory::IsSaving()) { // try for more 10 seconds
std::this_thread::sleep_for(chrono::milliseconds(100));
}
if (SaveMemory::IsSaving()) { if (SaveMemory::IsSaving()) {
LOG_TRACE(Lib_SaveData, "called while saving"); LOG_TRACE(Lib_SaveData, "called while saving");
return Error::BUSY_FOR_SAVING; return Error::BUSY_FOR_SAVING;
} }
}
LOG_DEBUG(Lib_SaveData, "called"); LOG_DEBUG(Lib_SaveData, "called");
auto data = setParam->data; auto data = setParam->data;
if (data != nullptr) { if (data != nullptr) {
@ -1584,8 +1587,8 @@ Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetu
} else { } else {
SaveMemory::SetIcon(nullptr, 0); SaveMemory::SetIcon(nullptr, 0);
} }
}
SaveMemory::TriggerSaveWithoutEvent(); SaveMemory::TriggerSaveWithoutEvent();
}
if (g_fw_ver >= ElfInfo::FW_45 && result != nullptr) { if (g_fw_ver >= ElfInfo::FW_45 && result != nullptr) {
result->existedMemorySize = existed_size; result->existedMemorySize = existed_size;
} }

View File

@ -10,327 +10,327 @@
namespace Libraries::Usbd { namespace Libraries::Usbd {
int PS4_SYSV_ABI sceUsbdAllocTransfer() { int PS4_SYSV_ABI sceUsbdAllocTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdAttachKernelDriver() { int PS4_SYSV_ABI sceUsbdAttachKernelDriver() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdBulkTransfer() { int PS4_SYSV_ABI sceUsbdBulkTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdCancelTransfer() { int PS4_SYSV_ABI sceUsbdCancelTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdCheckConnected() { int PS4_SYSV_ABI sceUsbdCheckConnected() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdClaimInterface() { int PS4_SYSV_ABI sceUsbdClaimInterface() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdClearHalt() { int PS4_SYSV_ABI sceUsbdClearHalt() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdClose() { int PS4_SYSV_ABI sceUsbdClose() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdControlTransfer() { int PS4_SYSV_ABI sceUsbdControlTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdControlTransferGetData() { int PS4_SYSV_ABI sceUsbdControlTransferGetData() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdControlTransferGetSetup() { int PS4_SYSV_ABI sceUsbdControlTransferGetSetup() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdDetachKernelDriver() { int PS4_SYSV_ABI sceUsbdDetachKernelDriver() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdEventHandlerActive() { int PS4_SYSV_ABI sceUsbdEventHandlerActive() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdEventHandlingOk() { int PS4_SYSV_ABI sceUsbdEventHandlingOk() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdExit() { int PS4_SYSV_ABI sceUsbdExit() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFillBulkTransfer() { int PS4_SYSV_ABI sceUsbdFillBulkTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFillControlSetup() { int PS4_SYSV_ABI sceUsbdFillControlSetup() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFillControlTransfer() { int PS4_SYSV_ABI sceUsbdFillControlTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFillInterruptTransfer() { int PS4_SYSV_ABI sceUsbdFillInterruptTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFillIsoTransfer() { int PS4_SYSV_ABI sceUsbdFillIsoTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFreeConfigDescriptor() { int PS4_SYSV_ABI sceUsbdFreeConfigDescriptor() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFreeDeviceList() { int PS4_SYSV_ABI sceUsbdFreeDeviceList() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdFreeTransfer() { int PS4_SYSV_ABI sceUsbdFreeTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetActiveConfigDescriptor() { int PS4_SYSV_ABI sceUsbdGetActiveConfigDescriptor() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetBusNumber() { int PS4_SYSV_ABI sceUsbdGetBusNumber() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetConfigDescriptor() { int PS4_SYSV_ABI sceUsbdGetConfigDescriptor() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetConfigDescriptorByValue() { int PS4_SYSV_ABI sceUsbdGetConfigDescriptorByValue() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetConfiguration() { int PS4_SYSV_ABI sceUsbdGetConfiguration() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetDescriptor() { int PS4_SYSV_ABI sceUsbdGetDescriptor() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetDevice() { int PS4_SYSV_ABI sceUsbdGetDevice() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetDeviceAddress() { int PS4_SYSV_ABI sceUsbdGetDeviceAddress() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetDeviceDescriptor() { int PS4_SYSV_ABI sceUsbdGetDeviceDescriptor() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetDeviceList() { int PS4_SYSV_ABI sceUsbdGetDeviceList() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetDeviceSpeed() { int PS4_SYSV_ABI sceUsbdGetDeviceSpeed() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetIsoPacketBuffer() { int PS4_SYSV_ABI sceUsbdGetIsoPacketBuffer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetMaxIsoPacketSize() { int PS4_SYSV_ABI sceUsbdGetMaxIsoPacketSize() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetMaxPacketSize() { int PS4_SYSV_ABI sceUsbdGetMaxPacketSize() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetStringDescriptor() { int PS4_SYSV_ABI sceUsbdGetStringDescriptor() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdGetStringDescriptorAscii() { int PS4_SYSV_ABI sceUsbdGetStringDescriptorAscii() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdHandleEvents() { int PS4_SYSV_ABI sceUsbdHandleEvents() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdHandleEventsLocked() { int PS4_SYSV_ABI sceUsbdHandleEventsLocked() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdHandleEventsTimeout() { int PS4_SYSV_ABI sceUsbdHandleEventsTimeout() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_DEBUG(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdInit() { int PS4_SYSV_ABI sceUsbdInit() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return 0x80240005; // Skip return 0x80240005; // Skip
} }
int PS4_SYSV_ABI sceUsbdInterruptTransfer() { int PS4_SYSV_ABI sceUsbdInterruptTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdKernelDriverActive() { int PS4_SYSV_ABI sceUsbdKernelDriverActive() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdLockEvents() { int PS4_SYSV_ABI sceUsbdLockEvents() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdLockEventWaiters() { int PS4_SYSV_ABI sceUsbdLockEventWaiters() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdOpen() { int PS4_SYSV_ABI sceUsbdOpen() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdOpenDeviceWithVidPid() { int PS4_SYSV_ABI sceUsbdOpenDeviceWithVidPid() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdRefDevice() { int PS4_SYSV_ABI sceUsbdRefDevice() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdReleaseInterface() { int PS4_SYSV_ABI sceUsbdReleaseInterface() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdResetDevice() { int PS4_SYSV_ABI sceUsbdResetDevice() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdSetConfiguration() { int PS4_SYSV_ABI sceUsbdSetConfiguration() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdSetInterfaceAltSetting() { int PS4_SYSV_ABI sceUsbdSetInterfaceAltSetting() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdSetIsoPacketLengths() { int PS4_SYSV_ABI sceUsbdSetIsoPacketLengths() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdSubmitTransfer() { int PS4_SYSV_ABI sceUsbdSubmitTransfer() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdTryLockEvents() { int PS4_SYSV_ABI sceUsbdTryLockEvents() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdUnlockEvents() { int PS4_SYSV_ABI sceUsbdUnlockEvents() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdUnlockEventWaiters() { int PS4_SYSV_ABI sceUsbdUnlockEventWaiters() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdUnrefDevice() { int PS4_SYSV_ABI sceUsbdUnrefDevice() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI sceUsbdWaitForEvent() { int PS4_SYSV_ABI sceUsbdWaitForEvent() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI Func_65F6EF33E38FFF50() { int PS4_SYSV_ABI Func_65F6EF33E38FFF50() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI Func_97F056BAD90AADE7() { int PS4_SYSV_ABI Func_97F056BAD90AADE7() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI Func_C55104A33B35B264() { int PS4_SYSV_ABI Func_C55104A33B35B264() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }
int PS4_SYSV_ABI Func_D56B43060720B1E0() { int PS4_SYSV_ABI Func_D56B43060720B1E0() {
LOG_ERROR(Lib_Usbd, "(STUBBED)called"); LOG_ERROR(Lib_Usbd, "(STUBBED) called");
return ORBIS_OK; return ORBIS_OK;
} }

View File

@ -7,6 +7,7 @@
#include "common/debug.h" #include "common/debug.h"
#include "core/libraries/kernel/memory.h" #include "core/libraries/kernel/memory.h"
#include "core/libraries/kernel/orbis_error.h" #include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/process.h"
#include "core/memory.h" #include "core/memory.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -35,7 +36,7 @@ MemoryManager::~MemoryManager() = default;
void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1,
bool use_extended_mem2) { bool use_extended_mem2) {
const bool is_neo = Config::isNeoMode(); const bool is_neo = ::Libraries::Kernel::sceKernelIsNeoMode();
auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM; auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM;
if (!use_extended_mem1 && is_neo) { if (!use_extended_mem1 && is_neo) {
total_size -= 256_MB; total_size -= 256_MB;

View File

@ -28,8 +28,6 @@
#include "core/file_format/trp.h" #include "core/file_format/trp.h"
#include "core/file_sys/fs.h" #include "core/file_sys/fs.h"
#include "core/libraries/disc_map/disc_map.h" #include "core/libraries/disc_map/disc_map.h"
#include "core/libraries/fiber/fiber.h"
#include "core/libraries/jpeg/jpegenc.h"
#include "core/libraries/libc_internal/libc_internal.h" #include "core/libraries/libc_internal/libc_internal.h"
#include "core/libraries/libs.h" #include "core/libraries/libs.h"
#include "core/libraries/ngs2/ngs2.h" #include "core/libraries/ngs2/ngs2.h"
@ -59,8 +57,8 @@ Emulator::Emulator() {
LOG_INFO(Loader, "Branch {}", Common::g_scm_branch); LOG_INFO(Loader, "Branch {}", Common::g_scm_branch);
LOG_INFO(Loader, "Description {}", Common::g_scm_desc); LOG_INFO(Loader, "Description {}", Common::g_scm_desc);
LOG_INFO(Config, "General Logtype: {}", Config::getLogType()); LOG_INFO(Config, "General LogType: {}", Config::getLogType());
LOG_INFO(Config, "General isNeo: {}", Config::isNeoMode()); LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu()); LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders()); LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv()); LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
@ -101,19 +99,12 @@ Emulator::~Emulator() {
} }
void Emulator::Run(const std::filesystem::path& file) { void Emulator::Run(const std::filesystem::path& file) {
// Use the eboot from the separated updates folder if it's there
std::filesystem::path game_patch_folder = file.parent_path();
game_patch_folder += "-UPDATE";
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
? game_patch_folder / file.filename()
: file;
// Applications expect to be run from /app0 so mount the file's parent path as app0. // Applications expect to be run from /app0 so mount the file's parent path as app0.
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance(); auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
mnt->Mount(file.parent_path(), "/app0"); const auto game_folder = file.parent_path();
mnt->Mount(game_folder, "/app0");
// Certain games may use /hostapp as well such as CUSA001100 // Certain games may use /hostapp as well such as CUSA001100
mnt->Mount(file.parent_path(), "/hostapp"); mnt->Mount(game_folder, "/hostapp");
auto& game_info = Common::ElfInfo::Instance(); auto& game_info = Common::ElfInfo::Instance();
@ -122,13 +113,12 @@ void Emulator::Run(const std::filesystem::path& file) {
std::string title; std::string title;
std::string app_version; std::string app_version;
u32 fw_version; u32 fw_version;
Common::PSFAttributes psf_attributes{};
std::filesystem::path sce_sys_folder = eboot_path.parent_path() / "sce_sys"; const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo");
if (std::filesystem::is_directory(sce_sys_folder)) { if (std::filesystem::exists(param_sfo_path)) {
for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) {
if (entry.path().filename() == "param.sfo") {
auto* param_sfo = Common::Singleton<PSF>::Instance(); auto* param_sfo = Common::Singleton<PSF>::Instance();
const bool success = param_sfo->Open(sce_sys_folder / "param.sfo"); const bool success = param_sfo->Open(param_sfo_path);
ASSERT_MSG(success, "Failed to open param.sfo"); ASSERT_MSG(success, "Failed to open param.sfo");
const auto content_id = param_sfo->GetString("CONTENT_ID"); const auto content_id = param_sfo->GetString("CONTENT_ID");
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID"); ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
@ -138,7 +128,7 @@ void Emulator::Run(const std::filesystem::path& file) {
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles"; Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
if (!std::filesystem::exists(trophyDir)) { if (!std::filesystem::exists(trophyDir)) {
TRP trp; TRP trp;
if (!trp.Extract(eboot_path.parent_path(), id)) { if (!trp.Extract(game_folder, id)) {
LOG_ERROR(Loader, "Couldn't extract trophies"); LOG_ERROR(Loader, "Couldn't extract trophies");
} }
} }
@ -158,17 +148,20 @@ void Emulator::Run(const std::filesystem::path& file) {
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000); fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version"); app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version); LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
} else if (entry.path().filename() == "pic1.png") { if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) {
psf_attributes.raw = *raw_attributes;
}
}
const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png");
if (std::filesystem::exists(pic1_path)) {
auto* splash = Common::Singleton<Splash>::Instance(); auto* splash = Common::Singleton<Splash>::Instance();
if (splash->IsLoaded()) { if (!splash->IsLoaded()) {
continue; if (!splash->Open(pic1_path)) {
}
if (!splash->Open(entry.path())) {
LOG_ERROR(Loader, "Game splash: unable to open file"); LOG_ERROR(Loader, "Game splash: unable to open file");
} }
} }
} }
}
game_info.initialized = true; game_info.initialized = true;
game_info.game_serial = id; game_info.game_serial = id;
@ -176,6 +169,7 @@ void Emulator::Run(const std::filesystem::path& file) {
game_info.app_ver = app_version; game_info.app_ver = app_version;
game_info.firmware_ver = fw_version & 0xFFF00000; game_info.firmware_ver = fw_version & 0xFFF00000;
game_info.raw_firmware_ver = fw_version; game_info.raw_firmware_ver = fw_version;
game_info.psf_attributes = psf_attributes;
std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version); std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version);
std::string window_title = ""; std::string window_title = "";
@ -219,6 +213,7 @@ void Emulator::Run(const std::filesystem::path& file) {
Libraries::InitHLELibs(&linker->GetHLESymbols()); Libraries::InitHLELibs(&linker->GetHLESymbols());
// Load the module with the linker // Load the module with the linker
const auto eboot_path = mnt->GetHostPath("/app0/" + file.filename().string());
linker->LoadModule(eboot_path); linker->LoadModule(eboot_path);
// check if we have system modules to load // check if we have system modules to load
@ -236,6 +231,8 @@ void Emulator::Run(const std::filesystem::path& file) {
} }
// Load all prx from separate update's sce_module folder // Load all prx from separate update's sce_module folder
std::filesystem::path game_patch_folder = game_folder;
game_patch_folder += "-UPDATE";
std::filesystem::path update_module_folder = game_patch_folder / "sce_module"; std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
if (std::filesystem::is_directory(update_module_folder)) { if (std::filesystem::is_directory(update_module_folder)) {
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) { for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <SDL3/SDL.h> #include <SDL3/SDL.h>
#include "common/config.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/libraries/kernel/time.h" #include "core/libraries/kernel/time.h"
#include "core/libraries/pad/pad.h" #include "core/libraries/pad/pad.h"
@ -189,11 +190,6 @@ void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceler
gz += Kp * ez + Ki * eInt[2]; gz += Kp * ez + Ki * eInt[2];
//// Integrate rate of change of quaternion //// Integrate rate of change of quaternion
// float pa = q2, pb = q3, pc = q4;
// q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
// q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime);
// q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime);
// q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime);
q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime); q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime); q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime);
q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime); q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime);
@ -247,6 +243,7 @@ void GameController::TryOpenSDLController() {
int gamepad_count; int gamepad_count;
SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count); SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count);
m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr; m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr;
if (Config::getIsMotionControlsEnabled()) {
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) { if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO); gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate); LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
@ -259,6 +256,8 @@ void GameController::TryOpenSDLController() {
} else { } else {
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad"); LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
} }
}
SDL_free(gamepads); SDL_free(gamepads);
SetLightBarRGB(0, 0, 255); SetLightBarRGB(0, 0, 255);
@ -266,6 +265,7 @@ void GameController::TryOpenSDLController() {
} }
u32 GameController::Poll() { u32 GameController::Poll() {
std::scoped_lock lock{m_mutex};
if (m_connected) { if (m_connected) {
auto time = Libraries::Kernel::sceKernelGetProcessTime(); auto time = Libraries::Kernel::sceKernelGetProcessTime();
if (m_states_num == 0) { if (m_states_num == 0) {

View File

@ -341,6 +341,8 @@ void SettingsDialog::LoadValuesFromConfig() {
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left")); toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior); int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0); ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
ui->motionControlsCheckBox->setChecked(
toml::find_or<bool>(data, "Input", "isMotionControlsEnabled", true));
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty()); ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
ResetInstallFolders(); ResetInstallFolders();
@ -536,6 +538,7 @@ void SettingsDialog::UpdateSettings() {
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]); Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
Config::setIsFullscreen(ui->fullscreenCheckBox->isChecked()); Config::setIsFullscreen(ui->fullscreenCheckBox->isChecked());
Config::setFullscreenMode(ui->fullscreenModeComboBox->currentText().toStdString()); Config::setFullscreenMode(ui->fullscreenModeComboBox->currentText().toStdString());
Config::setIsMotionControlsEnabled(ui->motionControlsCheckBox->isChecked());
Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked()); Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked());
Config::setPlayBGM(ui->playBGMCheckBox->isChecked()); Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
Config::setLogType(ui->logTypeComboBox->currentText().toStdString()); Config::setLogType(ui->logTypeComboBox->currentText().toStdString());

View File

@ -853,6 +853,13 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="motionControlsCheckBox">
<property name="text">
<string>Enable Motion Controls</string>
</property>
</widget>
</item>
<item> <item>
<widget class="QWidget" name="controllerWidgetSpacer" native="true"> <widget class="QWidget" name="controllerWidgetSpacer" native="true">
<property name="enabled"> <property name="enabled">

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0))); addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0); ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
} }
emit_ds_read_barrier = true;
} }
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) { void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64, void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
const GcnInst& inst) { const GcnInst& inst) {
if (emit_ds_read_barrier && profile.needs_lds_barriers) {
ir.Barrier();
emit_ds_read_barrier = false;
}
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))}; const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
IR::VectorReg dst_reg{inst.dst[0].code}; IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) { if (is_pair) {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
namespace Shader::Gcn { namespace Shader::Gcn {
@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) {
return; return;
} }
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle; const auto col_buf = runtime_info.fs_info.color_buffers[index];
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
const auto [r, g, b, a] = col_buf.swizzle;
const std::array swizzle_array = {r, g, b, a}; const std::array swizzle_array = {r, g, b, a};
const auto swizzled_comp = swizzle_array[comp]; const auto swizzled_comp = swizzle_array[comp];
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) { if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
ir.SetAttribute(attrib, value, comp); ir.SetAttribute(attrib, converted, comp);
return; return;
} }
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red)); ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
}; };
const auto unpack = [&](u32 idx) { const auto unpack = [&](u32 idx) {

View File

@ -106,6 +106,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_FF1_I32_B32(inst); return S_FF1_I32_B32(inst);
case Opcode::S_FF1_I32_B64: case Opcode::S_FF1_I32_B64:
return S_FF1_I32_B64(inst); return S_FF1_I32_B64(inst);
case Opcode::S_BITSET0_B32:
return S_BITSET_B32(inst, 0);
case Opcode::S_BITSET1_B32:
return S_BITSET_B32(inst, 1);
case Opcode::S_AND_SAVEEXEC_B64: case Opcode::S_AND_SAVEEXEC_B64:
return S_SAVEEXEC_B64(NegateMode::None, false, inst); return S_SAVEEXEC_B64(NegateMode::None, false, inst);
case Opcode::S_ORN2_SAVEEXEC_B64: case Opcode::S_ORN2_SAVEEXEC_B64:
@ -607,6 +611,13 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) {
SetDst(inst.dst[0], result); SetDst(inst.dst[0], result);
} }
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
const IR::U32 old_value{GetSrc(inst.dst[0])};
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
const IR::U32 result{ir.BitFieldInsert(old_value, ir.Imm32(bit_value), offset, ir.Imm32(1U))};
SetDst(inst.dst[0], result);
}
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) { void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination

View File

@ -114,6 +114,7 @@ public:
void S_BCNT1_I32_B64(const GcnInst& inst); void S_BCNT1_I32_B64(const GcnInst& inst);
void S_FF1_I32_B32(const GcnInst& inst); void S_FF1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B64(const GcnInst& inst); void S_FF1_I32_B64(const GcnInst& inst);
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
void S_ABS_I32(const GcnInst& inst); void S_ABS_I32(const GcnInst& inst);
@ -308,7 +309,6 @@ private:
const RuntimeInfo& runtime_info; const RuntimeInfo& runtime_info;
const Profile& profile; const Profile& profile;
bool opcode_missing = false; bool opcode_missing = false;
bool emit_ds_read_barrier = false;
}; };
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info, void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,

View File

@ -904,7 +904,7 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
case ConditionOp::GE: case ConditionOp::GE:
return ir.FPGreaterThanEqual(src0, src1); return ir.FPGreaterThanEqual(src0, src1);
case ConditionOp::U: case ConditionOp::U:
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1))); return ir.LogicalOr(ir.FPIsNan(src0), ir.FPIsNan(src1));
default: default:
UNREACHABLE(); UNREACHABLE();
} }

View File

@ -301,8 +301,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
}); });
} }
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
Descriptors& descriptors) {
s32 binding{}; s32 binding{};
AmdGpu::Buffer buffer; AmdGpu::Buffer buffer;
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) { if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
@ -317,19 +316,191 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}); });
} }
// Update buffer descriptor format.
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
// Replace handle with binding index in buffer resource list. // Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding)); inst.SetArg(0, ir.Imm32(binding));
}
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors) {
const IR::Inst* handle = inst.Arg(0).InstRecursive();
const IR::Inst* producer = handle->Arg(0).InstRecursive();
const auto sharp = TrackSharp(producer, info);
const s32 binding = descriptors.Add(TextureBufferResource{
.sharp_idx = sharp,
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
});
// Replace handle with binding index in texture buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
}
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
const auto opcode = inst->GetOpcode();
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to find image sharp source");
const IR::Inst* producer = result.value();
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
// Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
if (!image.Valid()) {
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
image = AmdGpu::Image::Null();
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
// Patch image instruction if image is FMask.
if (image.IsFmask()) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
switch (inst.GetOpcode()) {
case IR::Opcode::ImageRead:
case IR::Opcode::ImageSampleRaw: {
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
return;
}
case IR::Opcode::ImageQueryLod:
inst.ReplaceUsesWith(ir.Imm32(1));
return;
case IR::Opcode::ImageQueryDimensions: {
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
ir.Imm32(static_cast<u32>(image.width)), // y
ir.Imm32(1), ir.Imm32(1)); // depth, mip
inst.ReplaceUsesWith(dims);
// Track FMask resource to do specialization.
descriptors.Add(FMaskResource{
.sharp_idx = tsharp,
});
return;
}
default:
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
}
}
u32 image_binding = descriptors.Add(ImageResource{
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
.is_read = is_read,
.is_written = is_written,
});
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
// Read sampler sharp.
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
// Patch image and sampler handle.
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
} else {
// Patch image handle.
inst.SetArg(0, ir.Imm32(image_binding));
}
}
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.is_written = true,
});
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
// Attempt to deduce the GDS address of counter at compile time.
const u32 gds_addr = [&] {
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
return gds_offset.U32() & 0xFFFF;
}
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
}
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto handle = inst.Arg(0);
const auto buffer_res = info.buffers[handle.U32()];
const auto buffer = buffer_res.GetSharp(info);
ASSERT(!buffer.add_tid_enable); ASSERT(!buffer.add_tid_enable);
// Address of constant buffer reads can be calculated at IR emittion time. // Address of constant buffer reads can be calculated at IR emission time.
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
return; return;
} }
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const IR::U32 index_stride = ir.Imm32(buffer.index_stride); const IR::U32 index_stride = ir.Imm32(buffer.index_stride);
const IR::U32 element_size = ir.Imm32(buffer.element_size); const IR::U32 element_size = ir.Imm32(buffer.element_size);
@ -366,21 +537,27 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
inst.SetArg(1, address); inst.SetArg(1, address);
} }
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
Descriptors& descriptors) { const auto handle = inst.Arg(0);
const IR::Inst* handle = inst.Arg(0).InstRecursive(); const auto buffer_res = info.texture_buffers[handle.U32()];
const IR::Inst* producer = handle->Arg(0).InstRecursive(); const auto buffer = buffer_res.GetSharp(info);
const auto sharp = TrackSharp(producer, info);
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
const s32 binding = descriptors.Add(TextureBufferResource{
.sharp_idx = sharp,
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
});
// Replace handle with binding index in texture buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(binding));
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect());
const auto converted =
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
inst.SetArg(2, converted);
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
const auto converted =
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
inst.ReplaceUsesWith(converted);
}
} }
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
@ -409,39 +586,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
} }
} }
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
Descriptors& descriptors, const IR::Inst* producer, const AmdGpu::Image& image) {
const u32 image_binding, const AmdGpu::Image& image) { const auto handle = inst.Arg(0);
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> { auto sampler = sampler_res.GetSharp(info);
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
const IR::Value& handle = producer->Arg(1);
// Inline sampler resource.
if (handle.IsImmediate()) {
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = std::numeric_limits<u32>::max(),
.inline_sampler = inline_sampler,
});
return {binding, inline_sampler};
}
// Normal sampler resource.
const auto ssharp_handle = handle.InstRecursive();
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
const auto ssharp = TrackSharp(ssharp_ud, info);
const auto binding = descriptors.Add(SamplerResource{
.sharp_idx = ssharp,
.associated_image = image_binding,
.disable_aniso = disable_aniso,
});
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
}();
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16);
IR::Inst* body1 = inst.Arg(1).InstRecursive(); IR::Inst* body1 = inst.Arg(1).InstRecursive();
IR::Inst* body2 = inst.Arg(2).InstRecursive(); IR::Inst* body2 = inst.Arg(2).InstRecursive();
@ -539,8 +691,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
// Query dimensions of image if needed for normalization. // Query dimensions of image if needed for normalization.
// We can't use the image sharp because it could be bound to a different image later. // We can't use the image sharp because it could be bound to a different image later.
const auto dimensions = const auto dimensions =
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false)) unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{};
: IR::Value{};
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value { const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
const auto coord = get_addr_reg(coord_idx); const auto coord = get_addr_reg(coord_idx);
if (unnormalized) { if (unnormalized) {
@ -589,7 +740,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
: IR::F32{}; : IR::F32{};
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{}; const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
auto new_inst = [&] -> IR::Value { auto texel = [&] -> IR::Value {
if (inst_info.is_gather) { if (inst_info.is_gather) {
if (inst_info.is_depth) { if (inst_info.is_depth) {
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info); return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
@ -611,94 +762,30 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
} }
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
}(); }();
inst.ReplaceUsesWithAndRemove(new_inst);
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
inst.ReplaceUsesWith(converted);
} }
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> { // Nothing to patch for dimension query.
const auto opcode = inst->GetOpcode();
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
opcode == IR::Opcode::GetUserData) {
return inst;
}
return std::nullopt;
};
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to find image sharp source");
const IR::Inst* producer = result.value();
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
// Read image sharp.
const auto tsharp = TrackSharp(tsharp_handle, info);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
if (!image.Valid()) {
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
image = AmdGpu::Image::Null();
}
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
// Patch image instruction if image is FMask.
if (image.IsFmask()) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
switch (inst.GetOpcode()) {
case IR::Opcode::ImageRead:
case IR::Opcode::ImageSampleRaw: {
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
return;
}
case IR::Opcode::ImageQueryLod:
inst.ReplaceUsesWith(ir.Imm32(1));
return;
case IR::Opcode::ImageQueryDimensions: {
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
ir.Imm32(static_cast<u32>(image.width)), // y
ir.Imm32(1), ir.Imm32(1)); // depth, mip
inst.ReplaceUsesWith(dims);
// Track FMask resource to do specialization.
descriptors.Add(FMaskResource{
.sharp_idx = tsharp,
});
return;
}
default:
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
}
}
u32 image_binding = descriptors.Add(ImageResource{
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst),
.is_array = bool(inst_info.is_array),
.is_read = is_read,
.is_written = is_written,
});
// Sample instructions must be resolved into a new instruction using address register data.
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
return;
}
// Patch image handle
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(image_binding));
// No need to patch coordinates if we are just querying.
if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) { if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) {
return; return;
} }
const auto handle = inst.Arg(0);
const auto image_res = info.images[handle.U32() & 0xFFFF];
auto image = image_res.GetSharp(info);
// Sample instructions must be handled separately using address register data.
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
PatchImageSampleArgs(block, inst, info, image);
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
// Now that we know the image type, adjust texture coordinate vector. // Now that we know the image type, adjust texture coordinate vector.
IR::Inst* body = inst.Arg(1).InstRecursive(); IR::Inst* body = inst.Arg(1).InstRecursive();
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> { const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
@ -719,152 +806,77 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
case AmdGpu::ImageType::Color3D: // x, y, z, [lod] case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
case AmdGpu::ImageType::Cube: // x, y, face, [lod] case AmdGpu::ImageType::Cube: // x, y, face, [lod]
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written, return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
inst_info.is_array), inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
body->Arg(3)}; body->Arg(3)};
default: default:
UNREACHABLE_MSG("Unknown image type {}", image.GetType()); UNREACHABLE_MSG("Unknown image type {}", image.GetType());
} }
}(); }();
const auto has_ms = image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray;
ASSERT(!inst_info.has_lod || !has_ms);
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
const auto is_storage = image_res.IsStorage(image);
if (inst.GetOpcode() == IR::Opcode::ImageRead) {
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
if (is_storage) {
// Storage image requires shader swizzle.
texel = ApplySwizzle(ir, texel, image.DstSelect());
}
const auto converted =
ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
inst.ReplaceUsesWith(converted);
} else {
inst.SetArg(1, coords); inst.SetArg(1, coords);
if (inst_info.has_lod) {
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead ||
inst.GetOpcode() == IR::Opcode::ImageWrite);
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
inst.SetArg(2, arg);
} else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) &&
(inst.GetOpcode() == IR::Opcode::ImageRead ||
inst.GetOpcode() == IR::Opcode::ImageWrite)) {
inst.SetArg(3, arg);
}
}
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
const auto binding = inst.Arg(0).U32();
const auto buffer_res = info.texture_buffers[binding];
const auto buffer = buffer_res.GetSharp(info);
if (!buffer.Valid()) {
// Don't need to swizzle invalid buffer.
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
inst.ReplaceUsesWith(swizzled);
}
}
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
const auto binding = inst.Arg(0).U32();
const auto image_res = info.images[binding & 0xFFFF];
const auto image = image_res.GetSharp(info);
if (!image.Valid() || !image_res.IsStorage(image)) {
// Don't need to swizzle invalid or non-storage image.
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
if (inst.GetOpcode() == IR::Opcode::ImageWrite) { if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); inst.SetArg(2, lod);
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) { inst.SetArg(3, ms);
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const auto lod = inst.Arg(2); auto texel = inst.Arg(4);
const auto ms = inst.Arg(3); if (is_storage) {
const auto texel = // Storage image requires shader swizzle.
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod}, texel = ApplySwizzle(ir, texel, image.DstSelect());
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
inst.ReplaceUsesWith(swizzled);
} }
} const auto converted =
ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion());
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, inst.SetArg(4, converted);
Descriptors& descriptors) {
// Insert gds binding in the shader if it doesn't exist already.
// The buffer is used for append/consume counters.
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
const u32 binding = descriptors.Add(BufferResource{
.used_types = IR::Type::U32,
.inline_cbuf = GdsSharp,
.is_gds_buffer = true,
.is_written = true,
});
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
return inst;
} }
return std::nullopt;
};
// Attempt to deduce the GDS address of counter at compile time.
const u32 gds_addr = [&] {
const IR::Value& gds_offset = inst.Arg(0);
if (gds_offset.IsImmediate()) {
// Nothing to do, offset is known.
return gds_offset.U32() & 0xFFFF;
} }
const auto result = IR::BreadthFirstSearch(&inst, pred);
ASSERT_MSG(result, "Unable to track M0 source");
// M0 must be set by some user data register.
const IR::Inst* prod = gds_offset.InstRecursive();
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
u32 m0_val = info.user_data[ud_reg] >> 16;
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
m0_val += prod->Arg(1).U32();
}
return m0_val & 0xFFFF;
}();
// Patch instruction.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
inst.SetArg(1, ir.Imm32(binding));
} }
void ResourceTrackingPass(IR::Program& program) { void ResourceTrackingPass(IR::Program& program) {
// Iterate resource instructions and patch them after finding the sharp. // Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info; auto& info = program.info;
// Pass 1: Track resource sharps
Descriptors descriptors{info}; Descriptors descriptors{info};
for (IR::Block* const block : program.blocks) { for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) { if (IsBufferInstruction(inst)) {
PatchBufferInstruction(*block, inst, info, descriptors); PatchBufferSharp(*block, inst, info, descriptors);
continue; } else if (IsTextureBufferInstruction(inst)) {
} PatchTextureBufferSharp(*block, inst, info, descriptors);
if (IsTextureBufferInstruction(inst)) { } else if (IsImageInstruction(inst)) {
PatchTextureBufferInstruction(*block, inst, info, descriptors); PatchImageSharp(*block, inst, info, descriptors);
continue; } else if (IsDataRingInstruction(inst)) {
} PatchDataRingAccess(*block, inst, info, descriptors);
if (IsImageInstruction(inst)) {
PatchImageInstruction(*block, inst, info, descriptors);
continue;
}
if (IsDataRingInstruction(inst)) {
PatchDataRingInstruction(*block, inst, info, descriptors);
} }
} }
} }
// Second pass to reinterpret format read/write where needed, since we now know
// the bindings and their properties. // Pass 2: Patch instruction args
for (IR::Block* const block : program.blocks) { for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (IsTextureBufferInstruction(inst)) { if (IsBufferInstruction(inst)) {
PatchTextureBufferInterpretation(*block, inst, info); PatchBufferArgs(*block, inst, info);
continue; } else if (IsTextureBufferInstruction(inst)) {
} PatchTextureBufferArgs(*block, inst, info);
if (IsImageInstruction(inst)) { } else if (IsImageInstruction(inst)) {
PatchImageInterpretation(*block, inst, info); PatchImageArgs(*block, inst, info);
} }
} }
} }

View File

@ -8,6 +8,54 @@
namespace Shader::Optimization { namespace Shader::Optimization {
static void EmitBarrierInBlock(IR::Block* block) {
// This is inteded to insert a barrier when shared memory write and read
// occur in the same basic block. Also checks if branch depth is zero as
// we don't want to insert barrier in potentially divergent code.
bool emit_barrier_on_write = false;
bool emit_barrier_on_read = false;
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
if (emit_cond) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
ir.Barrier();
emit_cond = false;
}
};
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
emit_barrier(emit_barrier_on_read, inst);
emit_barrier_on_write = true;
}
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
emit_barrier(emit_barrier_on_write, inst);
emit_barrier_on_read = true;
}
}
}
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
// Insert a barrier after divergent conditional blocks.
// This avoids potential softlocks and crashes when some threads
// initialize shared memory and others read from it.
const IR::U1 cond = data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
}
}
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) { void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
if (!program.info.uses_shared || !profile.needs_lds_barriers) { if (!program.info.uses_shared || !profile.needs_lds_barriers) {
return; return;
@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
--branch_depth; --branch_depth;
continue; continue;
} }
if (node.type != Type::If) { if (node.type == Type::If && branch_depth++ == 0) {
EmitBarrierInMergeBlock(node.data);
continue; continue;
} }
u32 curr_depth = branch_depth++; if (node.type == Type::Block && branch_depth == 0) {
if (curr_depth != 0) { EmitBarrierInBlock(node.data.block);
continue;
}
const IR::U1 cond = node.data.if_node.cond;
const auto insert_barrier =
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
return true;
}
return std::nullopt;
});
if (insert_barrier) {
IR::Block* const merge = node.data.if_node.merge;
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
IR::IREmitter ir{*merge, insert_point};
ir.Barrier();
} }
} }
} }

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h"
namespace Shader::IR { namespace Shader::IR {
@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
return swizzled; return swizzled;
} }
/// Applies a number conversion in the read direction.
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
const AmdGpu::NumberConversion& conversion) {
switch (conversion) {
case AmdGpu::NumberConversion::None:
return value;
case AmdGpu::NumberConversion::UintToUscaled:
return ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
case AmdGpu::NumberConversion::SintToSscaled:
return ir.ConvertSToF(32, 32, ir.BitCast<U32>(value));
case AmdGpu::NumberConversion::UnormToUbnorm:
// Convert 0...1 to -1...1
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
default:
UNREACHABLE();
}
}
inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value,
const AmdGpu::NumberConversion& conversion) {
if (conversion == AmdGpu::NumberConversion::None) {
return value;
}
const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
return ir.CompositeConstruct(x, y, z, w);
}
/// Applies a number conversion in the write direction.
inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
const AmdGpu::NumberConversion& conversion) {
switch (conversion) {
case AmdGpu::NumberConversion::None:
return value;
case AmdGpu::NumberConversion::UintToUscaled:
// Need to return float type to maintain IR semantics.
return ir.BitCast<F32>(U32{ir.ConvertFToU(32, value)});
case AmdGpu::NumberConversion::SintToSscaled:
// Need to return float type to maintain IR semantics.
return ir.BitCast<F32>(U32{ir.ConvertFToS(32, value)});
case AmdGpu::NumberConversion::UnormToUbnorm:
// Convert -1...1 to 0...1
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
default:
UNREACHABLE();
}
}
inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value,
const AmdGpu::NumberConversion& conversion) {
if (conversion == AmdGpu::NumberConversion::None) {
return value;
}
const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
return ir.CompositeConstruct(x, y, z, w);
}
} // namespace Shader::IR } // namespace Shader::IR

View File

@ -180,6 +180,7 @@ struct FragmentRuntimeInfo {
std::array<PsInput, 32> inputs; std::array<PsInput, 32> inputs;
struct PsColorBuffer { struct PsColorBuffer {
AmdGpu::NumberFormat num_format; AmdGpu::NumberFormat num_format;
AmdGpu::NumberConversion num_conversion;
AmdGpu::CompMapping swizzle; AmdGpu::CompMapping swizzle;
auto operator<=>(const PsColorBuffer&) const noexcept = default; auto operator<=>(const PsColorBuffer&) const noexcept = default;

View File

@ -32,6 +32,7 @@ struct BufferSpecialization {
struct TextureBufferSpecialization { struct TextureBufferSpecialization {
bool is_integer = false; bool is_integer = false;
AmdGpu::CompMapping dst_select{}; AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
auto operator<=>(const TextureBufferSpecialization&) const = default; auto operator<=>(const TextureBufferSpecialization&) const = default;
}; };
@ -41,6 +42,7 @@ struct ImageSpecialization {
bool is_integer = false; bool is_integer = false;
bool is_storage = false; bool is_storage = false;
AmdGpu::CompMapping dst_select{}; AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
auto operator<=>(const ImageSpecialization&) const = default; auto operator<=>(const ImageSpecialization&) const = default;
}; };
@ -107,6 +109,7 @@ struct StageSpecialization {
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.dst_select = sharp.DstSelect(); spec.dst_select = sharp.DstSelect();
spec.num_conversion = sharp.GetNumberConversion();
}); });
ForEachSharp(binding, images, info->images, ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) { [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
@ -116,6 +119,7 @@ struct StageSpecialization {
if (spec.is_storage) { if (spec.is_storage) {
spec.dst_select = sharp.DstSelect(); spec.dst_select = sharp.DstSelect();
} }
spec.num_conversion = sharp.GetNumberConversion();
}); });
ForEachSharp(binding, fmasks, info->fmasks, ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) { [](auto& spec, const auto& desc, AmdGpu::Image sharp) {

View File

@ -454,7 +454,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::DrawIndirect: { case PM4ItOpcode::DrawIndirect: {
const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header); const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header);
const auto offset = draw_indirect->data_offset; const auto offset = draw_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndirectArgs); const auto size = sizeof(DrawIndirectArgs);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
@ -462,7 +461,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
if (rasterizer) { if (rasterizer) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address)); rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address));
rasterizer->DrawIndirect(false, ib_address, offset, size, 1, 0); rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0);
rasterizer->ScopeMarkerEnd(); rasterizer->ScopeMarkerEnd();
} }
break; break;
@ -471,7 +470,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index_indirect = const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header); reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header);
const auto offset = draw_index_indirect->data_offset; const auto offset = draw_index_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(DrawIndexedIndirectArgs); const auto size = sizeof(DrawIndexedIndirectArgs);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
@ -480,7 +478,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin( rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndexIndirect", cmd_address)); fmt::format("dcb:{}:DrawIndexIndirect", cmd_address));
rasterizer->DrawIndirect(true, ib_address, offset, size, 1, 0); rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0);
rasterizer->ScopeMarkerEnd(); rasterizer->ScopeMarkerEnd();
} }
break; break;
@ -489,7 +487,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto* draw_index_indirect = const auto* draw_index_indirect =
reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header); reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header);
const auto offset = draw_index_indirect->data_offset; const auto offset = draw_index_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs); DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
} }
@ -497,9 +494,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin( rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address)); fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address));
rasterizer->DrawIndirect(true, ib_address, offset, draw_index_indirect->stride, rasterizer->DrawIndirect(
draw_index_indirect->count, true, indirect_args_addr, offset, draw_index_indirect->stride,
draw_index_indirect->countAddr); draw_index_indirect->count, draw_index_indirect->countAddr);
rasterizer->ScopeMarkerEnd(); rasterizer->ScopeMarkerEnd();
} }
break; break;
@ -528,7 +525,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
reinterpret_cast<const PM4CmdDispatchIndirect*>(header); reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
auto& cs_program = GetCsRegs(); auto& cs_program = GetCsRegs();
const auto offset = dispatch_indirect->data_offset; const auto offset = dispatch_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header), DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
@ -538,7 +534,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin( rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DispatchIndirect", cmd_address)); fmt::format("dcb:{}:DispatchIndirect", cmd_address));
rasterizer->DispatchIndirect(ib_address, offset, size); rasterizer->DispatchIndirect(indirect_args_addr, offset, size);
rasterizer->ScopeMarkerEnd(); rasterizer->ScopeMarkerEnd();
} }
break; break;
@ -562,7 +558,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
case PM4ItOpcode::SetBase: { case PM4ItOpcode::SetBase: {
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header); const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable); ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>(); indirect_args_addr = set_base->Address<u64>();
break; break;
} }
case PM4ItOpcode::EventWrite: { case PM4ItOpcode::EventWrite: {
@ -823,10 +819,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
break; break;
} }
case PM4ItOpcode::DispatchIndirect: { case PM4ItOpcode::DispatchIndirect: {
const auto* dispatch_indirect = reinterpret_cast<const PM4CmdDispatchIndirect*>(header); const auto* dispatch_indirect =
reinterpret_cast<const PM4CmdDispatchIndirectMec*>(header);
auto& cs_program = GetCsRegs(); auto& cs_program = GetCsRegs();
const auto offset = dispatch_indirect->data_offset; const auto ib_address = dispatch_indirect->Address<VAddr>();
const auto ib_address = mapped_queues[vqid].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions); const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (DebugState.DumpingCurrentReg()) { if (DebugState.DumpingCurrentReg()) {
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header), DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
@ -835,7 +831,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
if (rasterizer && (cs_program.dispatch_initiator & 1)) { if (rasterizer && (cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header); const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
rasterizer->DispatchIndirect(ib_address, offset, size); rasterizer->DispatchIndirect(ib_address, 0, size);
rasterizer->ScopeMarkerEnd(); rasterizer->ScopeMarkerEnd();
} }
break; break;

View File

@ -20,9 +20,9 @@
#include "common/types.h" #include "common/types.h"
#include "common/unique_function.h" #include "common/unique_function.h"
#include "shader_recompiler/params.h" #include "shader_recompiler/params.h"
#include "types.h"
#include "video_core/amdgpu/pixel_format.h" #include "video_core/amdgpu/pixel_format.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
namespace Vulkan { namespace Vulkan {
class Rasterizer; class Rasterizer;
@ -814,7 +814,9 @@ struct Liverpool {
BitField<26, 1, u32> fmask_compression_disable_ci; BitField<26, 1, u32> fmask_compression_disable_ci;
BitField<27, 1, u32> fmask_compress_1frag_only; BitField<27, 1, u32> fmask_compress_1frag_only;
BitField<28, 1, u32> dcc_enable; BitField<28, 1, u32> dcc_enable;
BitField<29, 1, u32> cmask_addr_type; BitField<29, 2, u32> cmask_addr_type;
/// Neo-mode only
BitField<31, 1, u32> alt_tile_mode;
u32 u32all; u32 u32all;
} info; } info;
@ -889,17 +891,21 @@ struct Liverpool {
return !info.linear_general; return !info.linear_general;
} }
[[nodiscard]] DataFormat DataFormat() const { [[nodiscard]] DataFormat GetDataFmt() const {
return RemapDataFormat(info.format); return RemapDataFormat(info.format);
} }
[[nodiscard]] NumberFormat NumFormat() const { [[nodiscard]] NumberFormat GetNumberFmt() const {
// There is a small difference between T# and CB number types, account for it. // There is a small difference between T# and CB number types, account for it.
return RemapNumberFormat(info.number_type == NumberFormat::SnormNz return RemapNumberFormat(info.number_type == NumberFormat::SnormNz
? NumberFormat::Srgb ? NumberFormat::Srgb
: info.number_type.Value()); : info.number_type.Value());
} }
[[nodiscard]] NumberConversion GetNumberConversion() const {
return MapNumberConversion(info.number_type);
}
[[nodiscard]] CompMapping Swizzle() const { [[nodiscard]] CompMapping Swizzle() const {
// clang-format off // clang-format off
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{ static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
@ -936,7 +942,7 @@ struct Liverpool {
const auto swap_idx = static_cast<u32>(info.comp_swap.Value()); const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
const auto components_idx = NumComponents(info.format) - 1; const auto components_idx = NumComponents(info.format) - 1;
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx]; const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
return RemapComponents(info.format, mrt_swizzle); return RemapSwizzle(info.format, mrt_swizzle);
} }
}; };
@ -1477,11 +1483,12 @@ private:
std::vector<u32> ccb_buffer; std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{}; std::queue<Task::Handle> submits{};
ComputeProgram cs_state{}; ComputeProgram cs_state{};
VAddr indirect_args_addr{};
}; };
std::array<GpuQueue, NumTotalQueues> mapped_queues{}; std::array<GpuQueue, NumTotalQueues> mapped_queues{};
u32 num_mapped_queues{1u}; // GFX is always available u32 num_mapped_queues{1u}; // GFX is always available
VAddr indirect_args_addr{};
struct ConstantEngine { struct ConstantEngine {
void Reset() { void Reset() {
ce_count = 0; ce_count = 0;

View File

@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) {
return "Srgb"; return "Srgb";
case NumberFormat::Ubnorm: case NumberFormat::Ubnorm:
return "Ubnorm"; return "Ubnorm";
case NumberFormat::UbnromNz: case NumberFormat::UbnormNz:
return "UbnormNz"; return "UbnormNz";
case NumberFormat::Ubint: case NumberFormat::Ubint:
return "Ubint"; return "Ubint";

View File

@ -204,6 +204,11 @@ struct PM4CmdSetData {
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) { static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
return WritePacket<PM4ItOpcode::SetShReg>(cmdbuf, type, data...); return WritePacket<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
} }
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
static constexpr u32* SetUconfigReg(u32* cmdbuf, Args... data) {
return WritePacket<PM4ItOpcode::SetUconfigReg>(cmdbuf, type, data...);
}
}; };
struct PM4CmdNop { struct PM4CmdNop {
@ -791,6 +796,18 @@ struct PM4CmdDispatchIndirect {
u32 dispatch_initiator; ///< Dispatch Initiator Register u32 dispatch_initiator; ///< Dispatch Initiator Register
}; };
struct PM4CmdDispatchIndirectMec {
PM4Type3Header header;
u32 address0;
u32 address1;
u32 dispatch_initiator; ///< Dispatch Initiator Register
template <typename T>
T Address() const {
return std::bit_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
}
};
struct DrawIndirectArgs { struct DrawIndirectArgs {
u32 vertex_count_per_instance; u32 vertex_count_per_instance;
u32 instance_count; u32 instance_count;

View File

@ -11,94 +11,6 @@
namespace AmdGpu { namespace AmdGpu {
enum class CompSwizzle : u32 {
Zero = 0,
One = 1,
Red = 4,
Green = 5,
Blue = 6,
Alpha = 7,
};
struct CompMapping {
CompSwizzle r : 3;
CompSwizzle g : 3;
CompSwizzle b : 3;
CompSwizzle a : 3;
auto operator<=>(const CompMapping& other) const = default;
template <typename T>
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
return {
ApplySingle(data, r),
ApplySingle(data, g),
ApplySingle(data, b),
ApplySingle(data, a),
};
}
private:
template <typename T>
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
switch (swizzle) {
case CompSwizzle::Zero:
return T(0);
case CompSwizzle::One:
return T(1);
case CompSwizzle::Red:
return data[0];
case CompSwizzle::Green:
return data[1];
case CompSwizzle::Blue:
return data[2];
case CompSwizzle::Alpha:
return data[3];
default:
UNREACHABLE();
}
}
};
inline DataFormat RemapDataFormat(const DataFormat format) {
switch (format) {
case DataFormat::Format11_11_10:
return DataFormat::Format10_11_11;
case DataFormat::Format10_10_10_2:
return DataFormat::Format2_10_10_10;
case DataFormat::Format5_5_5_1:
return DataFormat::Format1_5_5_5;
default:
return format;
}
}
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
return format;
}
inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) {
switch (format) {
case DataFormat::Format11_11_10:
return {
.r = components.b,
.g = components.g,
.b = components.r,
.a = components.a,
};
case DataFormat::Format10_10_10_2:
case DataFormat::Format5_5_5_1:
return {
.r = components.a,
.g = components.b,
.b = components.g,
.a = components.r,
};
default:
return components;
}
}
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture] // Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
struct Buffer { struct Buffer {
u64 base_address : 44; u64 base_address : 44;
@ -138,7 +50,7 @@ struct Buffer {
.b = CompSwizzle(dst_sel_z), .b = CompSwizzle(dst_sel_z),
.a = CompSwizzle(dst_sel_w), .a = CompSwizzle(dst_sel_w),
}; };
return RemapComponents(DataFormat(data_format), dst_sel); return RemapSwizzle(DataFormat(data_format), dst_sel);
} }
NumberFormat GetNumberFmt() const noexcept { NumberFormat GetNumberFmt() const noexcept {
@ -149,6 +61,10 @@ struct Buffer {
return RemapDataFormat(DataFormat(data_format)); return RemapDataFormat(DataFormat(data_format));
} }
NumberConversion GetNumberConversion() const noexcept {
return MapNumberConversion(NumberFormat(num_format));
}
u32 GetStride() const noexcept { u32 GetStride() const noexcept {
return stride; return stride;
} }
@ -261,7 +177,15 @@ struct Image {
u64 min_lod_warn : 12; u64 min_lod_warn : 12;
u64 counter_bank_id : 8; u64 counter_bank_id : 8;
u64 lod_hw_cnt_en : 1; u64 lod_hw_cnt_en : 1;
u64 : 43; /// Neo-mode only
u64 compression_en : 1;
/// Neo-mode only
u64 alpha_is_on_msb : 1;
/// Neo-mode only
u64 color_transform : 1;
/// Neo-mode only
u64 alt_tile_mode : 1;
u64 : 39;
static constexpr Image Null() { static constexpr Image Null() {
Image image{}; Image image{};
@ -295,7 +219,7 @@ struct Image {
.b = CompSwizzle(dst_sel_z), .b = CompSwizzle(dst_sel_z),
.a = CompSwizzle(dst_sel_w), .a = CompSwizzle(dst_sel_w),
}; };
return RemapComponents(DataFormat(data_format), dst_sel); return RemapSwizzle(DataFormat(data_format), dst_sel);
} }
u32 Pitch() const { u32 Pitch() const {
@ -344,6 +268,10 @@ struct Image {
return RemapNumberFormat(NumberFormat(num_format)); return RemapNumberFormat(NumberFormat(num_format));
} }
NumberConversion GetNumberConversion() const noexcept {
return MapNumberConversion(NumberFormat(num_format));
}
TilingMode GetTilingMode() const { TilingMode GetTilingMode() const {
if (tiling_index >= 0 && tiling_index <= 7) { if (tiling_index >= 0 && tiling_index <= 7) {
return tiling_index == 5 ? TilingMode::Texture_MicroTiled return tiling_index == 5 ? TilingMode::Texture_MicroTiled

View File

@ -5,6 +5,7 @@
#include <string_view> #include <string_view>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/assert.h"
#include "common/types.h" #include "common/types.h"
namespace AmdGpu { namespace AmdGpu {
@ -177,11 +178,130 @@ enum class NumberFormat : u32 {
Float = 7, Float = 7,
Srgb = 9, Srgb = 9,
Ubnorm = 10, Ubnorm = 10,
UbnromNz = 11, UbnormNz = 11,
Ubint = 12, Ubint = 12,
Ubscaled = 13, Ubscaled = 13,
}; };
enum class CompSwizzle : u32 {
Zero = 0,
One = 1,
Red = 4,
Green = 5,
Blue = 6,
Alpha = 7,
};
enum class NumberConversion : u32 {
None,
UintToUscaled,
SintToSscaled,
UnormToUbnorm,
};
struct CompMapping {
CompSwizzle r : 3;
CompSwizzle g : 3;
CompSwizzle b : 3;
CompSwizzle a : 3;
auto operator<=>(const CompMapping& other) const = default;
template <typename T>
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
return {
ApplySingle(data, r),
ApplySingle(data, g),
ApplySingle(data, b),
ApplySingle(data, a),
};
}
private:
template <typename T>
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
switch (swizzle) {
case CompSwizzle::Zero:
return T(0);
case CompSwizzle::One:
return T(1);
case CompSwizzle::Red:
return data[0];
case CompSwizzle::Green:
return data[1];
case CompSwizzle::Blue:
return data[2];
case CompSwizzle::Alpha:
return data[3];
default:
UNREACHABLE();
}
}
};
inline DataFormat RemapDataFormat(const DataFormat format) {
switch (format) {
case DataFormat::Format11_11_10:
return DataFormat::Format10_11_11;
case DataFormat::Format10_10_10_2:
return DataFormat::Format2_10_10_10;
case DataFormat::Format5_5_5_1:
return DataFormat::Format1_5_5_5;
default:
return format;
}
}
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
switch (format) {
case NumberFormat::Uscaled:
return NumberFormat::Uint;
case NumberFormat::Sscaled:
return NumberFormat::Sint;
case NumberFormat::Ubnorm:
return NumberFormat::Unorm;
default:
return format;
}
}
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
switch (format) {
case DataFormat::Format11_11_10: {
CompMapping result;
result.r = swizzle.b;
result.g = swizzle.g;
result.b = swizzle.r;
result.a = swizzle.a;
return result;
}
case DataFormat::Format10_10_10_2:
case DataFormat::Format5_5_5_1: {
CompMapping result;
result.r = swizzle.a;
result.g = swizzle.b;
result.b = swizzle.g;
result.a = swizzle.r;
return result;
}
default:
return swizzle;
}
}
inline NumberConversion MapNumberConversion(const NumberFormat format) {
switch (format) {
case NumberFormat::Uscaled:
return NumberConversion::UintToUscaled;
case NumberFormat::Sscaled:
return NumberConversion::SintToSscaled;
case NumberFormat::Ubnorm:
return NumberConversion::UnormToUbnorm;
default:
return NumberConversion::None;
}
}
} // namespace AmdGpu } // namespace AmdGpu
template <> template <>

View File

@ -119,19 +119,23 @@ public:
return buffer; return buffer;
} }
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask, std::optional<vk::BufferMemoryBarrier2> GetBarrier(
vk::PipelineStageFlagBits2 dst_stage) { vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
u32 offset = 0) {
if (dst_acess_mask == access_mask && stage == dst_stage) { if (dst_acess_mask == access_mask && stage == dst_stage) {
return {}; return {};
} }
DEBUG_ASSERT(offset < size_bytes);
auto barrier = vk::BufferMemoryBarrier2{ auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage, .srcStageMask = stage,
.srcAccessMask = access_mask, .srcAccessMask = access_mask,
.dstStageMask = dst_stage, .dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask, .dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer, .buffer = buffer.buffer,
.size = size_bytes, .offset = offset,
.size = size_bytes - offset,
}; };
access_mask = dst_acess_mask; access_mask = dst_acess_mask;
stage = dst_stage; stage = dst_stage;
@ -150,8 +154,10 @@ public:
Vulkan::Scheduler* scheduler; Vulkan::Scheduler* scheduler;
MemoryUsage usage; MemoryUsage usage;
UniqueBuffer buffer; UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; vk::Flags<vk::AccessFlagBits2> access_mask{
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
}; };
class StreamBuffer : public Buffer { class StreamBuffer : public Buffer {

View File

@ -34,38 +34,19 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
// Ensure the first slot is used for the null buffer // Ensure the first slot is used for the null buffer
const auto null_id = const auto null_id =
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1); slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 16);
ASSERT(null_id.index == 0); ASSERT(null_id.index == 0);
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
const vk::BufferViewCreateInfo null_view_ci = {
.buffer = null_buffer,
.format = vk::Format::eR8Unorm,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);
ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
null_buffer_view = null_view;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
} }
BufferCache::~BufferCache() = default; BufferCache::~BufferCache() = default;
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
std::scoped_lock lk{mutex};
const bool is_tracked = IsRegionRegistered(device_addr, size); const bool is_tracked = IsRegionRegistered(device_addr, size);
if (!is_tracked) { if (is_tracked) {
return;
}
// Mark the page as CPU modified to stop tracking writes. // Mark the page as CPU modified to stop tracking writes.
SCOPE_EXIT {
memory_tracker.MarkRegionAsCpuModified(device_addr, size); memory_tracker.MarkRegionAsCpuModified(device_addr, size);
};
if (!memory_tracker.IsRegionGpuModified(device_addr, size)) {
// Page has not been modified by the GPU, nothing to do.
return;
} }
} }
@ -267,7 +248,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
const BufferId buffer_id = FindBuffer(address, num_bytes); const BufferId buffer_id = FindBuffer(address, num_bytes);
return &slot_buffers[buffer_id]; return &slot_buffers[buffer_id];
}(); }();
const vk::BufferMemoryBarrier2 buf_barrier = { const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer->Handle(),
.offset = buffer->Offset(address),
.size = num_bytes,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer, .srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
@ -279,9 +269,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1, .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &buf_barrier, .pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
}); });
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
} }
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) { std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
@ -346,6 +341,7 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
++page; ++page;
continue; continue;
} }
std::shared_lock lk{mutex};
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
const VAddr buf_start_addr = buffer.CpuAddr(); const VAddr buf_start_addr = buffer.CpuAddr();
const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes(); const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
@ -472,21 +468,41 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
}; };
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, boost::container::static_vector<vk::BufferMemoryBarrier2, 2> pre_barriers{};
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, if (auto src_barrier = overlap.GetBarrier(vk::AccessFlagBits2::eTransferRead,
}; vk::PipelineStageFlagBits2::eTransfer)) {
static constexpr vk::MemoryBarrier WRITE_BARRIER{ pre_barriers.push_back(*src_barrier);
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, }
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, if (auto dst_barrier =
}; new_buffer.GetBarrier(vk::AccessFlagBits2::eTransferWrite,
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits2::eTransfer, dst_base_offset)) {
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, pre_barriers.push_back(*dst_barrier);
READ_BARRIER, {}, {}); }
cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy); cmdbuf.pipelineBarrier2(vk::DependencyInfo{
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
vk::PipelineStageFlagBits::eAllCommands, .bufferMemoryBarrierCount = static_cast<u32>(pre_barriers.size()),
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); .pBufferMemoryBarriers = pre_barriers.data(),
});
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> post_barriers{};
if (auto src_barrier =
overlap.GetBarrier(vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
vk::PipelineStageFlagBits2::eAllCommands)) {
post_barriers.push_back(*src_barrier);
}
if (auto dst_barrier = new_buffer.GetBarrier(
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
vk::PipelineStageFlagBits2::eAllCommands, dst_base_offset)) {
post_barriers.push_back(*dst_barrier);
}
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
.pBufferMemoryBarriers = post_barriers.data(),
});
DeleteBuffer(overlap_id); DeleteBuffer(overlap_id);
} }
@ -496,8 +512,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
wanted_size = static_cast<u32>(device_addr_end - device_addr); wanted_size = static_cast<u32>(device_addr_end - device_addr);
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin); const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = slot_buffers.insert( const BufferId new_buffer_id = [&] {
instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size); std::scoped_lock lk{mutex};
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
AllFlags, size);
}();
auto& new_buffer = slot_buffers[new_buffer_id]; auto& new_buffer = slot_buffers[new_buffer_id];
const size_t size_bytes = new_buffer.SizeBytes(); const size_t size_bytes = new_buffer.SizeBytes();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
@ -537,10 +556,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
bool is_texel_buffer) { bool is_texel_buffer) {
std::scoped_lock lk{mutex};
boost::container::small_vector<vk::BufferCopy, 4> copies; boost::container::small_vector<vk::BufferCopy, 4> copies;
u64 total_size_bytes = 0; u64 total_size_bytes = 0;
u64 largest_copy = 0;
VAddr buffer_start = buffer.CpuAddr(); VAddr buffer_start = buffer.CpuAddr();
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
copies.push_back(vk::BufferCopy{ copies.push_back(vk::BufferCopy{
@ -549,7 +566,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
.size = range_size, .size = range_size,
}); });
total_size_bytes += range_size; total_size_bytes += range_size;
largest_copy = std::max(largest_copy, range_size);
}); });
SCOPE_EXIT { SCOPE_EXIT {
if (is_texel_buffer) { if (is_texel_buffer) {
@ -590,21 +606,36 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
} }
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
static constexpr vk::MemoryBarrier READ_BARRIER{ const vk::BufferMemoryBarrier2 pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite, .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
}; };
static constexpr vk::MemoryBarrier WRITE_BARRIER{ const vk::BufferMemoryBarrier2 post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = buffer.Handle(),
.offset = 0,
.size = buffer.SizeBytes(),
}; };
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
READ_BARRIER, {}, {}); .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies); cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
vk::PipelineStageFlagBits::eAllCommands, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); .bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
} }
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
@ -612,7 +643,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
TextureCache::BaseDesc desc{}; TextureCache::BaseDesc desc{};
desc.info.guest_address = device_addr; desc.info.guest_address = device_addr;
desc.info.guest_size_bytes = size; desc.info.guest_size = size;
const ImageId image_id = texture_cache.FindImage(desc, find_flags); const ImageId image_id = texture_cache.FindImage(desc, find_flags);
if (!image_id) { if (!image_id) {
return false; return false;
@ -654,10 +685,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
} }
if (!copies.empty()) { if (!copies.empty()) {
scheduler.EndRendering(); scheduler.EndRendering();
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(),
.offset = max_offset - size,
.size = size,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
.buffer = buffer.Handle(),
.offset = max_offset - size,
.size = size,
};
auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal,
vk::AccessFlagBits2::eTransferRead,
vk::PipelineStageFlagBits2::eTransfer, {});
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer, cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
.pImageMemoryBarriers = barriers.data(),
});
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(),
copies); copies);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
} }
return true; return true;
} }

View File

@ -3,7 +3,7 @@
#pragma once #pragma once
#include <mutex> #include <shared_mutex>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h> #include <tsl/robin_map.h>
@ -71,10 +71,6 @@ public:
return slot_buffers[id]; return slot_buffers[id];
} }
[[nodiscard]] vk::BufferView& NullBufferView() {
return null_buffer_view;
}
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
@ -157,10 +153,9 @@ private:
StreamBuffer staging_buffer; StreamBuffer staging_buffer;
StreamBuffer stream_buffer; StreamBuffer stream_buffer;
Buffer gds_buffer; Buffer gds_buffer;
std::mutex mutex; std::shared_mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges; RangeSet gpu_modified_ranges;
vk::BufferView null_buffer_view;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
PageTable page_table; PageTable page_table;
}; };

View File

@ -15,13 +15,8 @@ namespace VideoCore {
class MemoryTracker { class MemoryTracker {
public: public:
static constexpr size_t MAX_CPU_PAGE_BITS = 40; static constexpr size_t MAX_CPU_PAGE_BITS = 40;
static constexpr size_t HIGHER_PAGE_BITS = 22;
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
static constexpr size_t MANAGER_POOL_SIZE = 32; static constexpr size_t MANAGER_POOL_SIZE = 32;
static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
using Manager = WordManager<WORDS_STACK_NEEDED>;
public: public:
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {} explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
@ -30,7 +25,7 @@ public:
/// Returns true if a region has been modified from the CPU /// Returns true if a region has been modified from the CPU
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<true>( return IteratePages<true>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::CPU>(offset, size); return manager->template IsRegionModified<Type::CPU>(offset, size);
}); });
} }
@ -38,52 +33,34 @@ public:
/// Returns true if a region has been modified from the GPU /// Returns true if a region has been modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
return IteratePages<false>( return IteratePages<false>(
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
return manager->template IsRegionModified<Type::GPU>(offset, size); return manager->template IsRegionModified<Type::GPU>(offset, size);
}); });
} }
/// Mark region as CPU modified, notifying the device_tracker about this change /// Mark region as CPU modified, notifying the device_tracker about this change
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size, IteratePages<false>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) { [](RegionManager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, true>( manager->template ChangeRegionState<Type::CPU, true>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Unmark region as CPU modified, notifying the device_tracker about this change
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::CPU, false>(
manager->GetCpuAddr() + offset, size);
});
}
/// Mark region as modified from the host GPU /// Mark region as modified from the host GPU
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size, IteratePages<false>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) { [](RegionManager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, true>( manager->template ChangeRegionState<Type::GPU, true>(
manager->GetCpuAddr() + offset, size); manager->GetCpuAddr() + offset, size);
}); });
} }
/// Unmark region as modified from the host GPU
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
IteratePages<true>(dirty_cpu_addr, query_size,
[](Manager* manager, u64 offset, size_t size) {
manager->template ChangeRegionState<Type::GPU, false>(
manager->GetCpuAddr() + offset, size);
});
}
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
template <typename Func> template <typename Func>
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<true>(query_cpu_range, query_size, IteratePages<true>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) { [&func](RegionManager* manager, u64 offset, size_t size) {
manager->template ForEachModifiedRange<Type::CPU, true>( manager->template ForEachModifiedRange<Type::CPU, true>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
}); });
@ -93,7 +70,7 @@ public:
template <bool clear, typename Func> template <bool clear, typename Func>
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
IteratePages<false>(query_cpu_range, query_size, IteratePages<false>(query_cpu_range, query_size,
[&func](Manager* manager, u64 offset, size_t size) { [&func](RegionManager* manager, u64 offset, size_t size) {
if constexpr (clear) { if constexpr (clear) {
manager->template ForEachModifiedRange<Type::GPU, true>( manager->template ForEachModifiedRange<Type::GPU, true>(
manager->GetCpuAddr() + offset, size, func); manager->GetCpuAddr() + offset, size, func);
@ -114,7 +91,7 @@ private:
*/ */
template <bool create_region_on_fail, typename Func> template <bool create_region_on_fail, typename Func>
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type; using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
std::size_t remaining_size{size}; std::size_t remaining_size{size};
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
@ -155,7 +132,7 @@ private:
manager_pool.emplace_back(); manager_pool.emplace_back();
auto& last_pool = manager_pool.back(); auto& last_pool = manager_pool.back();
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE); std::construct_at(&last_pool[i], tracker, 0);
free_managers.push_back(&last_pool[i]); free_managers.push_back(&last_pool[i]);
} }
} }
@ -167,9 +144,9 @@ private:
} }
PageManager* tracker; PageManager* tracker;
std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool; std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
std::vector<Manager*> free_managers; std::vector<RegionManager*> free_managers;
std::array<Manager*, NUM_HIGH_PAGES> top_tier{}; std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -3,10 +3,12 @@
#pragma once #pragma once
#include <algorithm> #include <array>
#include <mutex>
#include <span> #include <span>
#include <utility> #include <utility>
#include "common/div_ceil.h"
#include "common/spin_lock.h"
#include "common/types.h" #include "common/types.h"
#include "video_core/page_manager.h" #include "video_core/page_manager.h"
@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64;
constexpr u64 BYTES_PER_PAGE = 4_KB; constexpr u64 BYTES_PER_PAGE = 4_KB;
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
constexpr u64 HIGHER_PAGE_BITS = 22;
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
enum class Type { enum class Type {
CPU, CPU,
GPU, GPU,
Untracked, Untracked,
}; };
/// Vector tracking modified pages tightly packed with small vector optimization using WordsArray = std::array<u64, NUM_REGION_WORDS>;
template <size_t stack_words = 1>
struct WordsArray {
/// Returns the pointer to the words state
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
return is_short ? stack.data() : heap;
}
/// Returns the pointer to the words state /**
[[nodiscard]] u64* Pointer(bool is_short) noexcept { * Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
return is_short ? stack.data() : heap; * Information is stored in bitsets for spacial locality and fast update of single pages.
} */
class RegionManager {
std::array<u64, stack_words> stack{}; ///< Small buffers storage
u64* heap; ///< Not-small buffers pointer to the storage
};
template <size_t stack_words = 1>
struct Words {
explicit Words() = default;
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
if (IsShort()) {
cpu.stack.fill(~u64{0});
gpu.stack.fill(0);
untracked.stack.fill(~u64{0});
} else {
// Share allocation between CPU and GPU pages and set their default values
u64* const alloc = new u64[num_words * 3];
cpu.heap = alloc;
gpu.heap = alloc + num_words;
untracked.heap = alloc + num_words * 2;
std::fill_n(cpu.heap, num_words, ~u64{0});
std::fill_n(gpu.heap, num_words, 0);
std::fill_n(untracked.heap, num_words, ~u64{0});
}
// Clean up tailing bits
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
const u64 last_word = (~u64{0} << shift) >> shift;
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
}
~Words() {
Release();
}
Words& operator=(Words&& rhs) noexcept {
Release();
size_bytes = rhs.size_bytes;
num_words = rhs.num_words;
cpu = rhs.cpu;
gpu = rhs.gpu;
untracked = rhs.untracked;
rhs.cpu.heap = nullptr;
return *this;
}
Words(Words&& rhs) noexcept
: size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
untracked{rhs.untracked} {
rhs.cpu.heap = nullptr;
}
Words& operator=(const Words&) = delete;
Words(const Words&) = delete;
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
return num_words <= stack_words;
}
/// Returns the number of words of the buffer
[[nodiscard]] size_t NumWords() const noexcept {
return num_words;
}
/// Release buffer resources
void Release() {
if (!IsShort()) {
// CPU written words is the base for the heap allocation
delete[] cpu.heap;
}
}
template <Type type>
std::span<u64> Span() noexcept {
if constexpr (type == Type::CPU) {
return std::span<u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) {
return std::span<u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) {
return std::span<u64>(untracked.Pointer(IsShort()), num_words);
}
}
template <Type type>
std::span<const u64> Span() const noexcept {
if constexpr (type == Type::CPU) {
return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::GPU) {
return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
} else if constexpr (type == Type::Untracked) {
return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
}
}
u64 size_bytes = 0;
size_t num_words = 0;
WordsArray<stack_words> cpu;
WordsArray<stack_words> gpu;
WordsArray<stack_words> untracked;
};
template <size_t stack_words = 1>
class WordManager {
public: public:
explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes) explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_)
: tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {} : tracker{tracker_}, cpu_addr{cpu_addr_} {
cpu.fill(~u64{0});
explicit WordManager() = default; gpu.fill(0);
untracked.fill(~u64{0});
}
explicit RegionManager() = default;
void SetCpuAddress(VAddr new_cpu_addr) { void SetCpuAddress(VAddr new_cpu_addr) {
cpu_addr = new_cpu_addr; cpu_addr = new_cpu_addr;
@ -175,12 +74,12 @@ public:
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL)); const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL)); const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
if (start >= SizeBytes() || end <= start) { if (start >= HIGHER_PAGE_SIZE || end <= start) {
return; return;
} }
auto [start_word, start_page] = GetWordPage(start); auto [start_word, start_page] = GetWordPage(start);
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
const size_t num_words = NumWords(); constexpr size_t num_words = NUM_REGION_WORDS;
start_word = std::min(start_word, num_words); start_word = std::min(start_word, num_words);
end_word = std::min(end_word, num_words); end_word = std::min(end_word, num_words);
const size_t diff = end_word - start_word; const size_t diff = end_word - start_word;
@ -225,21 +124,21 @@ public:
*/ */
template <Type type, bool enable> template <Type type, bool enable>
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
std::span<u64> state_words = words.template Span<type>(); std::scoped_lock lk{lock};
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); std::span<u64> state_words = Span<type>();
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
NotifyPageTracker<!enable>(index, untracked_words[index], mask); UpdateProtection<!enable>(index, untracked[index], mask);
} }
if constexpr (enable) { if constexpr (enable) {
state_words[index] |= mask; state_words[index] |= mask;
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
untracked_words[index] |= mask; untracked[index] |= mask;
} }
} else { } else {
state_words[index] &= ~mask; state_words[index] &= ~mask;
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask; untracked[index] &= ~mask;
} }
} }
}); });
@ -255,10 +154,10 @@ public:
*/ */
template <Type type, bool clear, typename Func> template <Type type, bool clear, typename Func>
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
std::scoped_lock lk{lock};
static_assert(type != Type::Untracked); static_assert(type != Type::Untracked);
std::span<u64> state_words = words.template Span<type>(); std::span<u64> state_words = Span<type>();
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
const size_t offset = query_cpu_range - cpu_addr; const size_t offset = query_cpu_range - cpu_addr;
bool pending = false; bool pending = false;
size_t pending_offset{}; size_t pending_offset{};
@ -269,16 +168,16 @@ public:
}; };
IterateWords(offset, size, [&](size_t index, u64 mask) { IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) { if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index]; mask &= ~untracked[index];
} }
const u64 word = state_words[index] & mask; const u64 word = state_words[index] & mask;
if constexpr (clear) { if constexpr (clear) {
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
NotifyPageTracker<true>(index, untracked_words[index], mask); UpdateProtection<true>(index, untracked[index], mask);
} }
state_words[index] &= ~mask; state_words[index] &= ~mask;
if constexpr (type == Type::CPU) { if constexpr (type == Type::CPU) {
untracked_words[index] &= ~mask; untracked[index] &= ~mask;
} }
} }
const size_t base_offset = index * PAGES_PER_WORD; const size_t base_offset = index * PAGES_PER_WORD;
@ -315,13 +214,11 @@ public:
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
static_assert(type != Type::Untracked); static_assert(type != Type::Untracked);
const std::span<const u64> state_words = words.template Span<type>(); const std::span<const u64> state_words = Span<type>();
[[maybe_unused]] const std::span<const u64> untracked_words =
words.template Span<Type::Untracked>();
bool result = false; bool result = false;
IterateWords(offset, size, [&](size_t index, u64 mask) { IterateWords(offset, size, [&](size_t index, u64 mask) {
if constexpr (type == Type::GPU) { if constexpr (type == Type::GPU) {
mask &= ~untracked_words[index]; mask &= ~untracked[index];
} }
const u64 word = state_words[index] & mask; const u64 word = state_words[index] & mask;
if (word != 0) { if (word != 0) {
@ -333,44 +230,7 @@ public:
return result; return result;
} }
/// Returns the number of words of the manager
[[nodiscard]] size_t NumWords() const noexcept {
return words.NumWords();
}
/// Returns the size in bytes of the manager
[[nodiscard]] u64 SizeBytes() const noexcept {
return words.size_bytes;
}
/// Returns true when the buffer fits in the small vector optimization
[[nodiscard]] bool IsShort() const noexcept {
return words.IsShort();
}
private: private:
template <Type type>
u64* Array() noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
template <Type type>
const u64* Array() const noexcept {
if constexpr (type == Type::CPU) {
return words.cpu.Pointer(IsShort());
} else if constexpr (type == Type::GPU) {
return words.gpu.Pointer(IsShort());
} else if constexpr (type == Type::Untracked) {
return words.untracked.Pointer(IsShort());
}
}
/** /**
* Notify tracker about changes in the CPU tracking state of a word in the buffer * Notify tracker about changes in the CPU tracking state of a word in the buffer
* *
@ -381,7 +241,7 @@ private:
* @tparam add_to_tracker True when the tracker should start tracking the new pages * @tparam add_to_tracker True when the tracker should start tracking the new pages
*/ */
template <bool add_to_tracker> template <bool add_to_tracker>
void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const { void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits; u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) { IteratePages(changed_bits, [&](size_t offset, size_t size) {
@ -390,9 +250,34 @@ private:
}); });
} }
template <Type type>
std::span<u64> Span() noexcept {
if constexpr (type == Type::CPU) {
return cpu;
} else if constexpr (type == Type::GPU) {
return gpu;
} else if constexpr (type == Type::Untracked) {
return untracked;
}
}
template <Type type>
std::span<const u64> Span() const noexcept {
if constexpr (type == Type::CPU) {
return cpu;
} else if constexpr (type == Type::GPU) {
return gpu;
} else if constexpr (type == Type::Untracked) {
return untracked;
}
}
Common::SpinLock lock;
PageManager* tracker; PageManager* tracker;
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
Words<stack_words> words; WordsArray cpu;
WordsArray gpu;
WordsArray untracked;
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -2,13 +2,14 @@
# SPDX-License-Identifier: GPL-2.0-or-later # SPDX-License-Identifier: GPL-2.0-or-later
set(SHADER_FILES set(SHADER_FILES
detile_m8x1.comp detilers/macro_32bpp.comp
detile_m8x2.comp detilers/macro_64bpp.comp
detile_m32x1.comp detilers/macro_8bpp.comp
detile_m32x2.comp detilers/micro_128bpp.comp
detile_m32x4.comp detilers/micro_16bpp.comp
detile_macro32x1.comp detilers/micro_32bpp.comp
detile_macro32x2.comp detilers/micro_64bpp.comp
detilers/micro_8bpp.comp
fs_tri.vert fs_tri.vert
post_process.frag post_process.frag
) )

View File

@ -0,0 +1,101 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#version 450
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(std430, binding = 0) buffer input_buf {
uint in_data[];
};
layout(std430, binding = 1) buffer output_buf {
uint out_data[];
};
layout(push_constant) uniform image_info {
uint num_levels;
uint pitch;
uint height;
uint c0;
uint c1;
} info;
const uint lut_8bpp[][16] = {
{
0x05040100, 0x45444140,
0x07060302, 0x47464342,
0x0d0c0908, 0x4d4c4948,
0x0f0e0b0a, 0x4f4e4b4a,
0x85848180, 0xc5c4c1c0,
0x87868382, 0xc7c6c3c2,
0x8d8c8988, 0xcdccc9c8,
0x8f8e8b8a, 0xcfcecbca,
},
{
0x15141110, 0x55545150,
0x17161312, 0x57565352,
0x1d1c1918, 0x5d5c5958,
0x1f1e1b1a, 0x5f5e5b5a,
0x95949190, 0xd5d4d1d0,
0x97969392, 0xd7d6d3d2,
0x9d9c9998, 0xdddcd9d8,
0x9f9e9b9a, 0xdfdedbda,
},
{
0x25242120, 0x65646160,
0x27262322, 0x67666362,
0x2d2c2928, 0x6d6c6968,
0x2f2e2b2a, 0x6f6e6b6a,
0xa5a4a1a0, 0xe5e4e1e0,
0xa7a6a3a2, 0xe7e6e3e2,
0xadaca9a8, 0xedece9e8,
0xafaeabaa, 0xefeeebea,
},
{
0x35343130, 0x75747170,
0x37363332, 0x77767372,
0x3d3c3938, 0x7d7c7978,
0x3f3e3b3a, 0x7f7e7b7a,
0xb5b4b1b0, 0xf5f4f1f0,
0xb7b6b3b2, 0xf7f6f3f2,
0xbdbcb9b8, 0xfdfcf9f8,
0xbfbebbba, 0xfffefbfa,
},
};
#define MICRO_TILE_DIM (8)
#define MICRO_TILE_SZ (256)
#define TEXELS_PER_ELEMENT (1)
#define BPP (8)
shared uint scratch[16];
void main() {
uint slot = gl_LocalInvocationID.x >> 2u;
atomicAnd(scratch[slot], 0);
uint x = gl_GlobalInvocationID.x % info.pitch;
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height;
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height);
uint col = bitfieldExtract(x, 0, 3);
uint row = bitfieldExtract(y, 0, 3);
uint lut = bitfieldExtract(z, 0, 2);
uint idx_dw = lut_8bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u];
uint byte_ofs = (gl_LocalInvocationID.x & 3u) * 8;
uint idx = bitfieldExtract(idx_dw >> byte_ofs, 0, 8);
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
uint tile_row = y / MICRO_TILE_DIM;
uint tile_column = x / MICRO_TILE_DIM;
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ;
uint offs = (slice_offs + tile_offs) + (idx * BPP / 8);
uint p0 = in_data[offs >> 2u];
uint byte = bitfieldExtract(p0 >> (offs * 8), 0, 8);
atomicOr(scratch[slot], byte << byte_ofs);
if (byte_ofs == 0) {
out_data[gl_GlobalInvocationID.x >> 2u] = scratch[slot];
}
}

View File

@ -39,6 +39,15 @@ public:
return &(*first_level_map[l1_page])[l2_page]; return &(*first_level_map[l1_page])[l2_page];
} }
[[nodiscard]] const Entry* find(size_t page) const {
const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
if (!first_level_map[l1_page]) {
return nullptr;
}
return &(*first_level_map[l1_page])[l2_page];
}
[[nodiscard]] const Entry& operator[](size_t page) const { [[nodiscard]] const Entry& operator[](size_t page) const {
const size_t l1_page = page >> SecondLevelBits; const size_t l1_page = page >> SecondLevelBits;
const size_t l2_page = page & (NumEntriesPerL1Page - 1); const size_t l2_page = page & (NumEntriesPerL1Page - 1);

View File

@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) { void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
static constexpr u64 PageShift = 12; static constexpr u64 PageShift = 12;
std::scoped_lock lk{mutex}; std::scoped_lock lk{lock};
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1; const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
const u64 page_start = addr >> PageShift; const u64 page_start = addr >> PageShift;
const u64 page_end = page_start + num_pages; const u64 page_end = page_start + num_pages;

View File

@ -4,8 +4,8 @@
#pragma once #pragma once
#include <memory> #include <memory>
#include <mutex>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include "common/spin_lock.h"
#include "common/types.h" #include "common/types.h"
namespace Vulkan { namespace Vulkan {
@ -35,8 +35,8 @@ private:
struct Impl; struct Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;
Vulkan::Rasterizer* rasterizer; Vulkan::Rasterizer* rasterizer;
std::mutex mutex;
boost::icl::interval_map<VAddr, s32> cached_pages; boost::icl::interval_map<VAddr, s32> cached_pages;
Common::SpinLock lock;
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags(
case AmdGpu::NumberFormat::Srgb: case AmdGpu::NumberFormat::Srgb:
return ImageRead | Mrt; return ImageRead | Mrt;
case AmdGpu::NumberFormat::Ubnorm: case AmdGpu::NumberFormat::Ubnorm:
case AmdGpu::NumberFormat::UbnromNz: case AmdGpu::NumberFormat::UbnormNz:
case AmdGpu::NumberFormat::Ubint: case AmdGpu::NumberFormat::Ubint:
case AmdGpu::NumberFormat::Ubscaled: case AmdGpu::NumberFormat::Ubscaled:
return ImageRead; return ImageRead;
@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor
} }
std::span<const SurfaceFormatInfo> SurfaceFormats() { std::span<const SurfaceFormatInfo> SurfaceFormats() {
// Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader.
static constexpr std::array formats{ static constexpr std::array formats{
// Invalid // Invalid
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm,
@ -490,7 +491,7 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eUndefined), vk::Format::eUndefined),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm,
vk::Format::eUndefined), vk::Format::eUndefined),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz,
vk::Format::eUndefined), vk::Format::eUndefined),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint,
vk::Format::eUndefined), vk::Format::eUndefined),
@ -501,10 +502,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR8Unorm), vk::Format::eR8Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm,
vk::Format::eR8Snorm), vk::Format::eR8Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR8Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR8Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint,
vk::Format::eR8Uint), vk::Format::eR8Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint,
@ -516,10 +513,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR16Unorm), vk::Format::eR16Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm,
vk::Format::eR16Snorm), vk::Format::eR16Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR16Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR16Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint,
vk::Format::eR16Uint), vk::Format::eR16Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint,
@ -531,10 +524,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR8G8Unorm), vk::Format::eR8G8Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm,
vk::Format::eR8G8Snorm), vk::Format::eR8G8Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR8G8Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR8G8Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint,
vk::Format::eR8G8Uint), vk::Format::eR8G8Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint,
@ -553,10 +542,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR16G16Unorm), vk::Format::eR16G16Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm,
vk::Format::eR16G16Snorm), vk::Format::eR16G16Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR16G16Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR16G16Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint,
vk::Format::eR16G16Uint), vk::Format::eR16G16Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint,
@ -573,10 +558,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eA2B10G10R10UnormPack32), vk::Format::eA2B10G10R10UnormPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm,
vk::Format::eA2B10G10R10SnormPack32), vk::Format::eA2B10G10R10SnormPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled,
vk::Format::eA2B10G10R10UscaledPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled,
vk::Format::eA2B10G10R10SscaledPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint,
vk::Format::eA2B10G10R10UintPack32), vk::Format::eA2B10G10R10UintPack32),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint,
@ -586,10 +567,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR8G8B8A8Unorm), vk::Format::eR8G8B8A8Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm,
vk::Format::eR8G8B8A8Snorm), vk::Format::eR8G8B8A8Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled,
vk::Format::eR8G8B8A8Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled,
vk::Format::eR8G8B8A8Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint,
vk::Format::eR8G8B8A8Uint), vk::Format::eR8G8B8A8Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint,
@ -608,10 +585,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
vk::Format::eR16G16B16A16Unorm), vk::Format::eR16G16B16A16Unorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm,
vk::Format::eR16G16B16A16Snorm), vk::Format::eR16G16B16A16Snorm),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint,
vk::Format::eR16G16B16A16Uint), vk::Format::eR16G16B16A16Uint),
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint, CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint,
@ -691,16 +664,40 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
return formats; return formats;
} }
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
static const size_t amd_gpu_data_format_bit_size = 6; // All values are under 64
static const size_t amd_gpu_number_format_bit_size = 4; // All values are under 16
static size_t GetSurfaceFormatTableIndex(AmdGpu::DataFormat data_format,
AmdGpu::NumberFormat num_format) {
DEBUG_ASSERT(u32(data_format) < 1 << amd_gpu_data_format_bit_size);
DEBUG_ASSERT(u32(num_format) < 1 << amd_gpu_number_format_bit_size);
size_t result = static_cast<size_t>(num_format) |
(static_cast<size_t>(data_format) << amd_gpu_number_format_bit_size);
return result;
}
static auto surface_format_table = []() constexpr {
std::array<vk::Format, 1 << amd_gpu_data_format_bit_size * 1 << amd_gpu_number_format_bit_size>
result;
for (auto& entry : result) {
entry = vk::Format::eUndefined;
}
for (const auto& supported_format : SurfaceFormats()) {
result[GetSurfaceFormatTableIndex(supported_format.data_format,
supported_format.number_format)] =
supported_format.vk_format;
}
return result;
}();
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
const auto& formats = SurfaceFormats(); vk::Format result = surface_format_table[GetSurfaceFormatTableIndex(data_format, num_format)];
const auto format = bool found =
std::find_if(formats.begin(), formats.end(), [&](const SurfaceFormatInfo& format_info) { result != vk::Format::eUndefined || data_format == AmdGpu::DataFormat::FormatInvalid;
return format_info.data_format == data_format && ASSERT_MSG(found, "Unknown data_format={} and num_format={}", static_cast<u32>(data_format),
format_info.number_format == num_format; static_cast<u32>(num_format));
}); return result;
ASSERT_MSG(format != formats.end(), "Unknown data_format={} and num_format={}",
static_cast<u32>(data_format), static_cast<u32>(num_format));
return format->vk_format;
} }
static constexpr DepthFormatInfo CreateDepthFormatInfo( static constexpr DepthFormatInfo CreateDepthFormatInfo(
@ -746,8 +743,8 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
const auto comp_swizzle = color_buffer.Swizzle(); const auto comp_swizzle = color_buffer.Swizzle();
const auto format = color_buffer.DataFormat(); const auto format = color_buffer.GetDataFmt();
const auto number_type = color_buffer.NumFormat(); const auto number_type = color_buffer.GetNumberFmt();
const auto& c0 = color_buffer.clear_word0; const auto& c0 = color_buffer.clear_word0;
const auto& c1 = color_buffer.clear_word1; const auto& c1 = color_buffer.clear_word1;

View File

@ -18,6 +18,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
auto& info = stages[int(Shader::LogicalStage::Compute)]; auto& info = stages[int(Shader::LogicalStage::Compute)];
info = &info_; info = &info_;
const auto debug_str = GetDebugString();
const vk::PipelineShaderStageCreateInfo shader_ci = { const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute, .stage = vk::ShaderStageFlagBits::eCompute,
@ -89,8 +90,9 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.bindingCount = static_cast<u32>(bindings.size()), .bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(), .pBindings = bindings.data(),
}; };
const auto device = instance.GetDevice();
auto [descriptor_set_result, descriptor_set] = auto [descriptor_set_result, descriptor_set] =
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); device.createDescriptorSetLayoutUnique(desc_layout_ci);
ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess, ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess,
"Failed to create compute descriptor set layout: {}", "Failed to create compute descriptor set layout: {}",
vk::to_string(descriptor_set_result)); vk::to_string(descriptor_set_result));
@ -107,6 +109,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ASSERT_MSG(layout_result == vk::Result::eSuccess, ASSERT_MSG(layout_result == vk::Result::eSuccess,
"Failed to create compute pipeline layout: {}", vk::to_string(layout_result)); "Failed to create compute pipeline layout: {}", vk::to_string(layout_result));
pipeline_layout = std::move(layout); pipeline_layout = std::move(layout);
SetObjectName(device, *pipeline_layout, "Compute PipelineLayout {}", debug_str);
const vk::ComputePipelineCreateInfo compute_pipeline_ci = { const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
.stage = shader_ci, .stage = shader_ci,
@ -117,6 +120,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}", ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
vk::to_string(pipeline_result)); vk::to_string(pipeline_result));
pipeline = std::move(pipe); pipeline = std::move(pipe);
SetObjectName(device, *pipeline, "Compute Pipeline {}", debug_str);
} }
ComputePipeline::~ComputePipeline() = default; ComputePipeline::~ComputePipeline() = default;

View File

@ -8,7 +8,6 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/io_file.h" #include "common/io_file.h"
#include "common/scope_exit.h"
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h" #include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/runtime_info.h"
@ -16,6 +15,7 @@
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -36,6 +36,7 @@ GraphicsPipeline::GraphicsPipeline(
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin()); std::ranges::copy(infos, stages.begin());
BuildDescSetLayout(); BuildDescSetLayout();
const auto debug_str = GetDebugString();
const vk::PushConstantRange push_constants = { const vk::PushConstantRange push_constants = {
.stageFlags = gp_stage_flags, .stageFlags = gp_stage_flags,
@ -54,6 +55,7 @@ GraphicsPipeline::GraphicsPipeline(
ASSERT_MSG(layout_result == vk::Result::eSuccess, ASSERT_MSG(layout_result == vk::Result::eSuccess,
"Failed to create graphics pipeline layout: {}", vk::to_string(layout_result)); "Failed to create graphics pipeline layout: {}", vk::to_string(layout_result));
pipeline_layout = std::move(layout); pipeline_layout = std::move(layout);
SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings; boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes; boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
@ -322,6 +324,7 @@ GraphicsPipeline::GraphicsPipeline(
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}", ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
vk::to_string(pipeline_result)); vk::to_string(pipeline_result));
pipeline = std::move(pipe); pipeline = std::move(pipe);
SetObjectName(device, *pipeline, "Graphics Pipeline {}", debug_str);
} }
GraphicsPipeline::~GraphicsPipeline() = default; GraphicsPipeline::~GraphicsPipeline() = default;

View File

@ -32,6 +32,7 @@ struct GraphicsPipelineKey {
u32 num_color_attachments; u32 num_color_attachments;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats; std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats; std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles; std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
vk::Format depth_format; vk::Format depth_format;
vk::Format stencil_format; vk::Format stencil_format;

View File

@ -92,13 +92,15 @@ std::string GetReadableVersion(u32 version) {
Instance::Instance(bool enable_validation, bool enable_crash_diagnostic) Instance::Instance(bool enable_validation, bool enable_crash_diagnostic)
: instance{CreateInstance(Frontend::WindowSystemType::Headless, enable_validation, : instance{CreateInstance(Frontend::WindowSystemType::Headless, enable_validation,
enable_crash_diagnostic)}, enable_crash_diagnostic)},
physical_devices{EnumeratePhysicalDevices(instance)} {} physical_devices{EnumeratePhysicalDevices(instance)},
crash_diagnostic{enable_crash_diagnostic} {}
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index, Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
bool enable_validation /*= false*/, bool enable_crash_diagnostic /*= false*/) bool enable_validation /*= false*/, bool enable_crash_diagnostic /*= false*/)
: instance{CreateInstance(window.GetWindowInfo().type, enable_validation, : instance{CreateInstance(window.GetWindowInfo().type, enable_validation,
enable_crash_diagnostic)}, enable_crash_diagnostic)},
physical_devices{EnumeratePhysicalDevices(instance)} { physical_devices{EnumeratePhysicalDevices(instance)},
crash_diagnostic{enable_crash_diagnostic} {
if (enable_validation) { if (enable_validation) {
debug_callback = CreateDebugCallback(*instance); debug_callback = CreateDebugCallback(*instance);
} }

View File

@ -81,7 +81,7 @@ public:
/// Returns true when a known debugging tool is attached. /// Returns true when a known debugging tool is attached.
bool HasDebuggingToolAttached() const { bool HasDebuggingToolAttached() const {
return has_renderdoc || has_nsight_graphics; return crash_diagnostic || has_renderdoc || has_nsight_graphics;
} }
/// Returns true if anisotropic filtering is supported /// Returns true if anisotropic filtering is supported
@ -338,6 +338,7 @@ private:
u32 subgroup_size{}; u32 subgroup_size{};
bool tooling_info{}; bool tooling_info{};
bool debug_utils_supported{}; bool debug_utils_supported{};
bool crash_diagnostic{};
bool has_nsight_graphics{}; bool has_nsight_graphics{};
bool has_renderdoc{}; bool has_renderdoc{};
}; };

View File

@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) { for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
info.fs_info.color_buffers[i] = { info.fs_info.color_buffers[i] = {
.num_format = graphics_key.color_num_formats[i], .num_format = graphics_key.color_num_formats[i],
.num_conversion = graphics_key.color_num_conversions[i],
.swizzle = graphics_key.color_swizzles[i], .swizzle = graphics_key.color_swizzles[i],
}; };
} }
@ -302,6 +303,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.num_color_attachments = 0; key.num_color_attachments = 0;
key.color_formats.fill(vk::Format::eUndefined); key.color_formats.fill(vk::Format::eUndefined);
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm); key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
key.blend_controls.fill({}); key.blend_controls.fill({});
key.write_masks.fill({}); key.write_masks.fill({});
key.color_swizzles.fill({}); key.color_swizzles.fill({});
@ -328,8 +330,9 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
key.color_formats[remapped_cb] = key.color_formats[remapped_cb] =
LiverpoolToVK::SurfaceFormat(col_buf.DataFormat(), col_buf.NumFormat()); LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
key.color_num_formats[remapped_cb] = col_buf.NumFormat(); key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
key.color_swizzles[remapped_cb] = col_buf.Swizzle(); key.color_swizzles[remapped_cb] = col_buf.Swizzle();
} }

View File

@ -6,6 +6,7 @@
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -55,4 +56,19 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers&
cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {}); cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {});
} }
std::string Pipeline::GetDebugString() const {
std::string stage_desc;
for (const auto& stage : stages) {
if (stage) {
const auto shader_name = PipelineCache::GetShaderName(stage->stage, stage->pgm_hash);
if (stage_desc.empty()) {
stage_desc = shader_name;
} else {
stage_desc = fmt::format("{},{}", stage_desc, shader_name);
}
}
}
return stage_desc;
}
} // namespace Vulkan } // namespace Vulkan

Some files were not shown because too many files have changed in this diff Show More