mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 17:02:40 +00:00
Merge remote-tracking branch 'origin/main' into fullscreen
This commit is contained in:
commit
391e529d7a
6
.github/ISSUE_TEMPLATE/game-bug-report.yaml
vendored
6
.github/ISSUE_TEMPLATE/game-bug-report.yaml
vendored
@ -89,7 +89,7 @@ body:
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: "Logs"
|
||||
description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
|
||||
label: "Log File"
|
||||
description: Drag and drop the log file here. It can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
|
||||
validations:
|
||||
required: false
|
||||
required: true
|
||||
|
10
.github/workflows/build.yml
vendored
10
.github/workflows/build.yml
vendored
@ -14,14 +14,14 @@ env:
|
||||
|
||||
jobs:
|
||||
reuse:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: fsfe/reuse-action@v5
|
||||
|
||||
clang-format:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@ -39,7 +39,7 @@ jobs:
|
||||
run: ./.ci/clang-format.sh
|
||||
|
||||
get-info:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
date: ${{ steps.vars.outputs.date }}
|
||||
shorthash: ${{ steps.vars.outputs.shorthash }}
|
||||
@ -57,7 +57,7 @@ jobs:
|
||||
echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
|
||||
|
||||
windows-sdl:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2025
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
path: ${{github.workspace}}/build/shadPS4.exe
|
||||
|
||||
windows-qt:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2025
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -209,6 +209,7 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp
|
||||
|
||||
set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp
|
||||
src/core/libraries/gnmdriver/gnmdriver.h
|
||||
src/core/libraries/gnmdriver/gnmdriver_init.h
|
||||
src/core/libraries/gnmdriver/gnm_error.h
|
||||
)
|
||||
|
||||
@ -335,6 +336,8 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp
|
||||
src/core/libraries/share_play/shareplay.h
|
||||
src/core/libraries/razor_cpu/razor_cpu.cpp
|
||||
src/core/libraries/razor_cpu/razor_cpu.h
|
||||
src/core/libraries/mouse/mouse.cpp
|
||||
src/core/libraries/mouse/mouse.h
|
||||
)
|
||||
|
||||
set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h
|
||||
|
@ -48,6 +48,7 @@ static std::string updateChannel;
|
||||
static std::string backButtonBehavior = "left";
|
||||
static bool useSpecialPad = false;
|
||||
static int specialPadClass = 1;
|
||||
static bool isMotionControlsEnabled = true;
|
||||
static bool isDebugDump = false;
|
||||
static bool isShaderDebug = false;
|
||||
static bool isShowSplash = false;
|
||||
@ -101,7 +102,7 @@ void setTrophyKey(std::string key) {
|
||||
trophyKey = key;
|
||||
}
|
||||
|
||||
bool isNeoMode() {
|
||||
bool isNeoModeConsole() {
|
||||
return isNeo;
|
||||
}
|
||||
|
||||
@ -177,6 +178,10 @@ int getSpecialPadClass() {
|
||||
return specialPadClass;
|
||||
}
|
||||
|
||||
bool getIsMotionControlsEnabled() {
|
||||
return isMotionControlsEnabled;
|
||||
}
|
||||
|
||||
bool debugDump() {
|
||||
return isDebugDump;
|
||||
}
|
||||
@ -377,6 +382,10 @@ void setSpecialPadClass(int type) {
|
||||
specialPadClass = type;
|
||||
}
|
||||
|
||||
void setIsMotionControlsEnabled(bool use) {
|
||||
isMotionControlsEnabled = use;
|
||||
}
|
||||
|
||||
void setSeparateUpdateEnabled(bool use) {
|
||||
separateupdatefolder = use;
|
||||
}
|
||||
@ -604,6 +613,7 @@ void load(const std::filesystem::path& path) {
|
||||
backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left");
|
||||
useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false);
|
||||
specialPadClass = toml::find_or<int>(input, "specialPadClass", 1);
|
||||
isMotionControlsEnabled = toml::find_or<bool>(input, "isMotionControlsEnabled", true);
|
||||
}
|
||||
|
||||
if (data.contains("GPU")) {
|
||||
@ -720,6 +730,7 @@ void save(const std::filesystem::path& path) {
|
||||
data["Input"]["backButtonBehavior"] = backButtonBehavior;
|
||||
data["Input"]["useSpecialPad"] = useSpecialPad;
|
||||
data["Input"]["specialPadClass"] = specialPadClass;
|
||||
data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled;
|
||||
data["GPU"]["screenWidth"] = screenWidth;
|
||||
data["GPU"]["screenHeight"] = screenHeight;
|
||||
data["GPU"]["nullGpu"] = isNullGpu;
|
||||
|
@ -17,10 +17,9 @@ void saveMainWindow(const std::filesystem::path& path);
|
||||
|
||||
std::string getTrophyKey();
|
||||
void setTrophyKey(std::string key);
|
||||
|
||||
bool isNeoMode();
|
||||
bool getIsFullscreen();
|
||||
std::string getFullscreenMode();
|
||||
bool isNeoModeConsole();
|
||||
bool getPlayBGM();
|
||||
int getBGMvolume();
|
||||
bool getisTrophyPopupDisabled();
|
||||
@ -39,6 +38,7 @@ int getCursorHideTimeout();
|
||||
std::string getBackButtonBehavior();
|
||||
bool getUseSpecialPad();
|
||||
int getSpecialPadClass();
|
||||
bool getIsMotionControlsEnabled();
|
||||
|
||||
u32 getScreenWidth();
|
||||
u32 getScreenHeight();
|
||||
@ -86,6 +86,7 @@ void setCursorHideTimeout(int newcursorHideTimeout);
|
||||
void setBackButtonBehavior(const std::string& type);
|
||||
void setUseSpecialPad(bool use);
|
||||
void setSpecialPadClass(int type);
|
||||
void setIsMotionControlsEnabled(bool use);
|
||||
|
||||
void setLogType(const std::string& type);
|
||||
void setLogFilter(const std::string& type);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <string_view>
|
||||
|
||||
#include "assert.h"
|
||||
#include "bit_field.h"
|
||||
#include "singleton.h"
|
||||
#include "types.h"
|
||||
|
||||
@ -16,6 +17,46 @@ class Emulator;
|
||||
|
||||
namespace Common {
|
||||
|
||||
union PSFAttributes {
|
||||
/// Supports initial user's logout
|
||||
BitField<0, 1, u32> support_initial_user_logout;
|
||||
/// Enter button for the common dialog is cross.
|
||||
BitField<1, 1, u32> enter_button_cross;
|
||||
/// Warning dialog for PS Move is displayed in the options menu.
|
||||
BitField<2, 1, u32> ps_move_warning;
|
||||
/// Supports stereoscopic 3D.
|
||||
BitField<3, 1, u32> support_stereoscopic_3d;
|
||||
/// Suspends when PS button is pressed.
|
||||
BitField<4, 1, u32> ps_button_suspend;
|
||||
/// Enter button for the common dialog is assigned by the system software.
|
||||
BitField<5, 1, u32> enter_button_system;
|
||||
/// Overrides share menu behavior.
|
||||
BitField<6, 1, u32> override_share_menu;
|
||||
/// Suspends when PS button is pressed and special output resolution is set.
|
||||
BitField<8, 1, u32> special_res_ps_button_suspend;
|
||||
/// Enable HDCP.
|
||||
BitField<9, 1, u32> enable_hdcp;
|
||||
/// Disable HDCP for non-game.
|
||||
BitField<10, 1, u32> disable_hdcp_non_game;
|
||||
/// Supports PS VR.
|
||||
BitField<14, 1, u32> support_ps_vr;
|
||||
/// CPU mode (6 CPU)
|
||||
BitField<15, 1, u32> six_cpu_mode;
|
||||
/// CPU mode (7 CPU)
|
||||
BitField<16, 1, u32> seven_cpu_mode;
|
||||
/// Supports PS4 Pro (Neo) mode.
|
||||
BitField<23, 1, u32> support_neo_mode;
|
||||
/// Requires PS VR.
|
||||
BitField<26, 1, u32> require_ps_vr;
|
||||
/// Supports HDR.
|
||||
BitField<29, 1, u32> support_hdr;
|
||||
/// Display location.
|
||||
BitField<31, 1, u32> display_location;
|
||||
|
||||
u32 raw{};
|
||||
};
|
||||
static_assert(sizeof(PSFAttributes) == 4);
|
||||
|
||||
class ElfInfo {
|
||||
friend class Core::Emulator;
|
||||
|
||||
@ -26,6 +67,7 @@ class ElfInfo {
|
||||
std::string app_ver{};
|
||||
u32 firmware_ver = 0;
|
||||
u32 raw_firmware_ver = 0;
|
||||
PSFAttributes psf_attributes{};
|
||||
|
||||
public:
|
||||
static constexpr u32 FW_15 = 0x1500000;
|
||||
@ -68,6 +110,11 @@ public:
|
||||
ASSERT(initialized);
|
||||
return raw_firmware_ver;
|
||||
}
|
||||
|
||||
[[nodiscard]] const PSFAttributes& PSFAttributes() const {
|
||||
ASSERT(initialized);
|
||||
return psf_attributes;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
|
@ -126,6 +126,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
|
||||
SUB(Lib, Vdec2) \
|
||||
SUB(Lib, Videodec) \
|
||||
SUB(Lib, RazorCpu) \
|
||||
SUB(Lib, Mouse) \
|
||||
CLS(Frontend) \
|
||||
CLS(Render) \
|
||||
SUB(Render, Vulkan) \
|
||||
|
@ -93,6 +93,7 @@ enum class Class : u8 {
|
||||
Lib_Vdec2, ///< The LibSceVideodec2 implementation.
|
||||
Lib_Videodec, ///< The LibSceVideodec implementation.
|
||||
Lib_RazorCpu, ///< The LibRazorCpu implementation.
|
||||
Lib_Mouse, ///< The LibSceMouse implementation
|
||||
Frontend, ///< Emulator UI
|
||||
Render, ///< Video Core
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
|
@ -66,7 +66,7 @@ void RegPopup::DrawColorBuffer(const AmdGpu::Liverpool::ColorBuffer& buffer) {
|
||||
"GetColorSliceSize()", buffer.GetColorSliceSize(),
|
||||
"GetTilingMode()", buffer.GetTilingMode(),
|
||||
"IsTiled()", buffer.IsTiled(),
|
||||
"NumFormat()", buffer.NumFormat()
|
||||
"NumFormat()", buffer.GetNumberFmt()
|
||||
);
|
||||
|
||||
// clang-format on
|
||||
|
@ -3,13 +3,13 @@
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <stop_token>
|
||||
#include <thread>
|
||||
#include <magic_enum/magic_enum.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/polyfill_thread.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/libraries/audio/audioout.h"
|
||||
#include "core/libraries/audio/audioout_backend.h"
|
||||
@ -18,7 +18,7 @@
|
||||
|
||||
namespace Libraries::AudioOut {
|
||||
|
||||
std::shared_mutex ports_mutex;
|
||||
std::mutex port_open_mutex{};
|
||||
std::array<PortOut, SCE_AUDIO_OUT_NUM_PORTS> ports_out{};
|
||||
|
||||
static std::unique_ptr<AudioOutBackend> audio;
|
||||
@ -93,17 +93,20 @@ int PS4_SYSV_ABI sceAudioOutClose(s32 handle) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(ports_mutex);
|
||||
std::unique_lock open_lock{port_open_mutex};
|
||||
auto& port = ports_out.at(handle - 1);
|
||||
if (!port.impl) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
{
|
||||
std::unique_lock lock{port.mutex};
|
||||
if (!port.IsOpen()) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
std::free(port.output_buffer);
|
||||
port.output_buffer = nullptr;
|
||||
port.output_ready = false;
|
||||
port.impl = nullptr;
|
||||
}
|
||||
|
||||
// Stop outside of port lock scope to prevent deadlocks.
|
||||
port.output_thread.Stop();
|
||||
std::free(port.output_buffer);
|
||||
port.output_buffer = nullptr;
|
||||
port.output_ready = false;
|
||||
port.impl = nullptr;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
@ -172,35 +175,34 @@ int PS4_SYSV_ABI sceAudioOutGetPortState(s32 handle, OrbisAudioOutPortState* sta
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(ports_mutex);
|
||||
const auto& port = ports_out.at(handle - 1);
|
||||
if (!port.impl) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
auto& port = ports_out.at(handle - 1);
|
||||
{
|
||||
std::unique_lock lock{port.mutex};
|
||||
if (!port.IsOpen()) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
switch (port.type) {
|
||||
case OrbisAudioOutPort::Main:
|
||||
case OrbisAudioOutPort::Bgm:
|
||||
case OrbisAudioOutPort::Voice:
|
||||
state->output = 1;
|
||||
state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels;
|
||||
break;
|
||||
case OrbisAudioOutPort::Personal:
|
||||
case OrbisAudioOutPort::Padspk:
|
||||
state->output = 4;
|
||||
state->channel = 1;
|
||||
break;
|
||||
case OrbisAudioOutPort::Aux:
|
||||
state->output = 0;
|
||||
state->channel = 0;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
state->rerouteCounter = 0;
|
||||
state->volume = 127;
|
||||
}
|
||||
|
||||
state->rerouteCounter = 0;
|
||||
state->volume = 127;
|
||||
|
||||
switch (port.type) {
|
||||
case OrbisAudioOutPort::Main:
|
||||
case OrbisAudioOutPort::Bgm:
|
||||
case OrbisAudioOutPort::Voice:
|
||||
state->output = 1;
|
||||
state->channel = port.format_info.num_channels > 2 ? 2 : port.format_info.num_channels;
|
||||
break;
|
||||
case OrbisAudioOutPort::Personal:
|
||||
case OrbisAudioOutPort::Padspk:
|
||||
state->output = 4;
|
||||
state->channel = 1;
|
||||
break;
|
||||
case OrbisAudioOutPort::Aux:
|
||||
state->output = 0;
|
||||
state->channel = 0;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
@ -279,15 +281,16 @@ static void AudioOutputThread(PortOut* port, const std::stop_token& stop) {
|
||||
while (true) {
|
||||
timer.Start();
|
||||
{
|
||||
std::unique_lock lock{port->output_mutex};
|
||||
Common::CondvarWait(port->output_cv, lock, stop, [&] { return port->output_ready; });
|
||||
if (stop.stop_requested()) {
|
||||
break;
|
||||
std::unique_lock lock{port->mutex};
|
||||
if (port->output_cv.wait(lock, stop, [&] { return port->output_ready; })) {
|
||||
port->impl->Output(port->output_buffer);
|
||||
port->output_ready = false;
|
||||
}
|
||||
port->impl->Output(port->output_buffer);
|
||||
port->output_ready = false;
|
||||
}
|
||||
port->output_cv.notify_one();
|
||||
if (stop.stop_requested()) {
|
||||
break;
|
||||
}
|
||||
timer.End();
|
||||
}
|
||||
}
|
||||
@ -332,27 +335,30 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id,
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT;
|
||||
}
|
||||
|
||||
std::scoped_lock lock{ports_mutex};
|
||||
std::unique_lock open_lock{port_open_mutex};
|
||||
const auto port =
|
||||
std::ranges::find_if(ports_out, [&](const PortOut& p) { return p.impl == nullptr; });
|
||||
std::ranges::find_if(ports_out, [&](const PortOut& p) { return !p.IsOpen(); });
|
||||
if (port == ports_out.end()) {
|
||||
LOG_ERROR(Lib_AudioOut, "Audio ports are full");
|
||||
return ORBIS_AUDIO_OUT_ERROR_PORT_FULL;
|
||||
}
|
||||
|
||||
port->type = port_type;
|
||||
port->format_info = GetFormatInfo(format);
|
||||
port->sample_rate = sample_rate;
|
||||
port->buffer_frames = length;
|
||||
port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB);
|
||||
{
|
||||
std::unique_lock port_lock(port->mutex);
|
||||
|
||||
port->impl = audio->Open(*port);
|
||||
port->type = port_type;
|
||||
port->format_info = GetFormatInfo(format);
|
||||
port->sample_rate = sample_rate;
|
||||
port->buffer_frames = length;
|
||||
port->volume.fill(SCE_AUDIO_OUT_VOLUME_0DB);
|
||||
|
||||
port->output_buffer = std::malloc(port->BufferSize());
|
||||
port->output_ready = false;
|
||||
port->output_thread.Run(
|
||||
[port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); });
|
||||
port->impl = audio->Open(*port);
|
||||
|
||||
port->output_buffer = std::malloc(port->BufferSize());
|
||||
port->output_ready = false;
|
||||
port->output_thread.Run(
|
||||
[port](const std::stop_token& stop) { AudioOutputThread(&*port, stop); });
|
||||
}
|
||||
return std::distance(ports_out.begin(), port) + 1;
|
||||
}
|
||||
|
||||
@ -367,14 +373,13 @@ s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, void* ptr) {
|
||||
}
|
||||
|
||||
auto& port = ports_out.at(handle - 1);
|
||||
if (!port.impl) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock lock{port.output_mutex};
|
||||
std::unique_lock lock{port.mutex};
|
||||
if (!port.IsOpen()) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
port.output_cv.wait(lock, [&] { return !port.output_ready; });
|
||||
if (ptr != nullptr) {
|
||||
if (ptr != nullptr && port.IsOpen()) {
|
||||
std::memcpy(port.output_buffer, ptr, port.BufferSize());
|
||||
port.output_ready = true;
|
||||
}
|
||||
@ -488,19 +493,19 @@ s32 PS4_SYSV_ABI sceAudioOutSetVolume(s32 handle, s32 flag, s32* vol) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
|
||||
std::scoped_lock lock(ports_mutex);
|
||||
auto& port = ports_out.at(handle - 1);
|
||||
if (!port.impl) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
|
||||
for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) {
|
||||
if (flag & 0x1u) {
|
||||
port.volume[i] = vol[i];
|
||||
{
|
||||
std::unique_lock lock{port.mutex};
|
||||
if (!port.IsOpen()) {
|
||||
return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT;
|
||||
}
|
||||
for (int i = 0; i < port.format_info.num_channels; i++, flag >>= 1u) {
|
||||
if (flag & 0x1u) {
|
||||
port.volume[i] = vol[i];
|
||||
}
|
||||
}
|
||||
port.impl->SetVolume(port.volume);
|
||||
}
|
||||
|
||||
port.impl->SetVolume(port.volume);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "core/libraries/kernel/threads.h"
|
||||
@ -74,10 +76,10 @@ struct AudioFormatInfo {
|
||||
};
|
||||
|
||||
struct PortOut {
|
||||
std::mutex mutex;
|
||||
std::unique_ptr<PortBackend> impl{};
|
||||
|
||||
void* output_buffer;
|
||||
std::mutex output_mutex;
|
||||
std::condition_variable_any output_cv;
|
||||
bool output_ready;
|
||||
Kernel::Thread output_thread{};
|
||||
@ -88,6 +90,10 @@ struct PortOut {
|
||||
u32 buffer_frames;
|
||||
std::array<s32, 8> volume;
|
||||
|
||||
[[nodiscard]] bool IsOpen() const {
|
||||
return impl != nullptr;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 BufferSize() const {
|
||||
return buffer_frames * format_info.FrameSize();
|
||||
}
|
||||
|
@ -14,14 +14,7 @@ namespace Libraries::AudioOut {
|
||||
class SDLPortBackend : public PortBackend {
|
||||
public:
|
||||
explicit SDLPortBackend(const PortOut& port)
|
||||
: frame_size(port.format_info.FrameSize()), buffer_size(port.BufferSize()) {
|
||||
// We want the latency for delivering frames out to be as small as possible,
|
||||
// so set the sample frames hint to the number of frames per buffer.
|
||||
const auto samples_num_str = std::to_string(port.buffer_frames);
|
||||
if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) {
|
||||
LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}",
|
||||
samples_num_str, SDL_GetError());
|
||||
}
|
||||
: frame_size(port.format_info.FrameSize()), guest_buffer_size(port.BufferSize()) {
|
||||
const SDL_AudioSpec fmt = {
|
||||
.format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE,
|
||||
.channels = port.format_info.num_channels,
|
||||
@ -33,7 +26,7 @@ public:
|
||||
LOG_ERROR(Lib_AudioOut, "Failed to create SDL audio stream: {}", SDL_GetError());
|
||||
return;
|
||||
}
|
||||
queue_threshold = CalculateQueueThreshold();
|
||||
CalculateQueueThreshold();
|
||||
if (!SDL_SetAudioStreamInputChannelMap(stream, port.format_info.channel_layout.data(),
|
||||
port.format_info.num_channels)) {
|
||||
LOG_ERROR(Lib_AudioOut, "Failed to configure SDL audio stream channel map: {}",
|
||||
@ -71,9 +64,9 @@ public:
|
||||
queue_threshold);
|
||||
SDL_ClearAudioStream(stream);
|
||||
// Recalculate the threshold in case this happened because of a device change.
|
||||
queue_threshold = CalculateQueueThreshold();
|
||||
CalculateQueueThreshold();
|
||||
}
|
||||
if (!SDL_PutAudioStreamData(stream, ptr, static_cast<int>(buffer_size))) {
|
||||
if (!SDL_PutAudioStreamData(stream, ptr, static_cast<int>(guest_buffer_size))) {
|
||||
LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError());
|
||||
}
|
||||
}
|
||||
@ -91,7 +84,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
[[nodiscard]] u32 CalculateQueueThreshold() const {
|
||||
void CalculateQueueThreshold() {
|
||||
SDL_AudioSpec discard;
|
||||
int sdl_buffer_frames;
|
||||
if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard,
|
||||
@ -100,13 +93,22 @@ private:
|
||||
SDL_GetError());
|
||||
sdl_buffer_frames = 0;
|
||||
}
|
||||
return std::max<u32>(buffer_size, sdl_buffer_frames * frame_size) * 4;
|
||||
const auto sdl_buffer_size = sdl_buffer_frames * frame_size;
|
||||
const auto new_threshold = std::max(guest_buffer_size, sdl_buffer_size) * 4;
|
||||
if (host_buffer_size != sdl_buffer_size || queue_threshold != new_threshold) {
|
||||
host_buffer_size = sdl_buffer_size;
|
||||
queue_threshold = new_threshold;
|
||||
LOG_INFO(Lib_AudioOut,
|
||||
"SDL audio buffers: guest = {} bytes, host = {} bytes, threshold = {} bytes",
|
||||
guest_buffer_size, host_buffer_size, queue_threshold);
|
||||
}
|
||||
}
|
||||
|
||||
u32 frame_size;
|
||||
u32 buffer_size;
|
||||
u32 queue_threshold;
|
||||
SDL_AudioStream* stream;
|
||||
u32 guest_buffer_size;
|
||||
u32 host_buffer_size{};
|
||||
u32 queue_threshold{};
|
||||
SDL_AudioStream* stream{};
|
||||
};
|
||||
|
||||
std::unique_ptr<PortBackend> SDLAudioOut::Open(PortOut& port) {
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "core/address_space.h"
|
||||
#include "core/debug_state.h"
|
||||
#include "core/libraries/gnmdriver/gnm_error.h"
|
||||
#include "core/libraries/gnmdriver/gnmdriver_init.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
#include "core/libraries/libs.h"
|
||||
@ -54,244 +55,11 @@ enum ShaderStages : u32 {
|
||||
|
||||
static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu};
|
||||
|
||||
static constexpr auto HwInitPacketSize = 0x100u;
|
||||
|
||||
// clang-format off
|
||||
static constexpr std::array InitSequence{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6000000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence.size() == 0x73 + 2);
|
||||
|
||||
static constexpr std::array InitSequence175{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence175.size() == 0x73 + 2);
|
||||
|
||||
static constexpr std::array InitSequence200{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence200.size() == 0x76 + 2);
|
||||
|
||||
static constexpr std::array InitSequence350{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
};
|
||||
static_assert(InitSequence350.size() == 0x7c + 2);
|
||||
|
||||
static constexpr std::array CtxInitSequence{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0111000u, 0u
|
||||
};
|
||||
static_assert(CtxInitSequence.size() == 0x0f);
|
||||
|
||||
static constexpr std::array CtxInitSequence400{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
0xc09e1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400.size() == 0x61);
|
||||
// clang-format on
|
||||
// Gates use of what appear to be the neo-mode init sequences but with the older
|
||||
// IA_MULTI_VGT_PARAM register address. No idea what this is for as the ioctl
|
||||
// that controls it is still a mystery, but leaving the sequences in gated behind
|
||||
// this flag in case we need it in the future.
|
||||
static constexpr bool UseNeoCompatSequences = false;
|
||||
|
||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||
static u32 submission_lock{};
|
||||
@ -317,6 +85,14 @@ static void WaitGpuIdle() {
|
||||
cv_lock.wait(lock, [] { return submission_lock == 0; });
|
||||
}
|
||||
|
||||
// Write a special ending NOP packet with N DWs data block
|
||||
static inline u32* WriteTrailingNop(u32* cmdbuf, u32 data_block_size) {
|
||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
|
||||
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
|
||||
return cmdbuf + data_block_size + 1 /* header */;
|
||||
}
|
||||
|
||||
// Write a special ending NOP packet with N DWs data block
|
||||
template <u32 data_block_size>
|
||||
static inline u32* WriteTrailingNop(u32* cmdbuf) {
|
||||
@ -607,9 +383,16 @@ s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset,
|
||||
return -1;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier) {
|
||||
if (cmdbuf != nullptr && size == 8 && args != 0 && ((args & 3u) == 0)) {
|
||||
cmdbuf[0] = 0xc0021602 | (modifier & 1u);
|
||||
*(VAddr*)(&cmdbuf[1]) = args;
|
||||
cmdbuf[3] = (modifier & 0x18) | 1u;
|
||||
cmdbuf[4] = 0xc0021000;
|
||||
cmdbuf[5] = 0;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return ORBIS_FAIL;
|
||||
}
|
||||
|
||||
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
@ -619,17 +402,30 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x216u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x217u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x216u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x217u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
||||
|
||||
if (sceKernelIsNeoMode()) {
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x219u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE2
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x21au,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE3
|
||||
}
|
||||
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(cmdbuf, 6);
|
||||
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0u);
|
||||
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0xau);
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
|
||||
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, sceKernelIsNeoMode() ? 0xe9 : 0xef);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
return HwInitPacketSize;
|
||||
}
|
||||
|
||||
@ -646,7 +442,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr
|
||||
draw_index->index_base_lo = u32(index_addr);
|
||||
draw_index->index_base_hi = u32(index_addr >> 32);
|
||||
draw_index->index_count = index_count;
|
||||
draw_index->draw_initiator = 0;
|
||||
draw_index->draw_initiator = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
|
||||
|
||||
WriteTrailingNop<3>(cmdbuf + 6);
|
||||
return ORBIS_OK;
|
||||
@ -659,8 +455,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32
|
||||
|
||||
if (cmdbuf && (size == 7) &&
|
||||
(flags & 0x1ffffffe) == 0) { // no predication will be set in the packet
|
||||
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(cmdbuf, PM4ShaderType::ShaderGraphics,
|
||||
index_count, 2u);
|
||||
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(
|
||||
cmdbuf, PM4ShaderType::ShaderGraphics, index_count,
|
||||
sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u);
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
@ -684,7 +481,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset,
|
||||
cmdbuf[0] = data_offset;
|
||||
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[3] = 0;
|
||||
cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u;
|
||||
|
||||
cmdbuf += 4;
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
@ -699,8 +496,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
|
||||
u32 flags) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (cmdbuf && (size == 16) && (shader_stage < ShaderStages::Max) &&
|
||||
(vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u)) {
|
||||
if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) &&
|
||||
(shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) &&
|
||||
(instance_sgpr_offset < 0x10u)) {
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 2);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
@ -719,7 +517,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
|
||||
cmdbuf[4] = max_count;
|
||||
*(u64*)(&cmdbuf[5]) = count_addr;
|
||||
cmdbuf[7] = sizeof(DrawIndexedIndirectArgs);
|
||||
cmdbuf[8] = 0;
|
||||
cmdbuf[8] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
|
||||
|
||||
cmdbuf += 9;
|
||||
WriteTrailingNop<2>(cmdbuf);
|
||||
@ -748,7 +546,8 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset,
|
||||
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexOffset2>(
|
||||
cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate);
|
||||
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count, 0u);
|
||||
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count,
|
||||
sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u);
|
||||
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
@ -772,7 +571,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32
|
||||
cmdbuf[0] = data_offset;
|
||||
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[3] = 2; // auto index
|
||||
cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u; // auto index
|
||||
|
||||
cmdbuf += 4;
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
@ -801,6 +600,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
}
|
||||
|
||||
const auto& SetupContext = [](u32* cmdbuf, u32 size, bool clear_state) {
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
if (clear_state) {
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
}
|
||||
@ -808,10 +608,8 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4);
|
||||
cmdbuf += InitSequence.size() - 2;
|
||||
|
||||
const auto cmdbuf_left =
|
||||
HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
};
|
||||
@ -826,12 +624,13 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4);
|
||||
cmdbuf += InitSequence175.size() - 2;
|
||||
|
||||
constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1;
|
||||
WriteTrailingNop<cmdbuf_left>(cmdbuf);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
}
|
||||
@ -844,17 +643,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
|
||||
}
|
||||
|
||||
const auto& SetupContext200 = [](u32* cmdbuf, u32 size, bool clear_state) {
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
if (clear_state) {
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200.size() - 2;
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
std::memcpy(cmdbuf, &InitSequence200Neo[2], (InitSequence200Neo.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200Neo.size() - 2;
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence200NeoCompat[2],
|
||||
(InitSequence200NeoCompat.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200NeoCompat.size() - 2;
|
||||
}
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200.size() - 2;
|
||||
}
|
||||
|
||||
const auto cmdbuf_left =
|
||||
HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
};
|
||||
@ -870,17 +679,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
|
||||
}
|
||||
|
||||
const auto& SetupContext350 = [](u32* cmdbuf, u32 size, bool clear_state) {
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
if (clear_state) {
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350.size() - 2;
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
std::memcpy(cmdbuf, &InitSequence350Neo[2], (InitSequence350Neo.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350Neo.size() - 2;
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence350NeoCompat[2],
|
||||
(InitSequence350NeoCompat.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350NeoCompat.size() - 2;
|
||||
}
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350.size() - 2;
|
||||
}
|
||||
|
||||
const auto cmdbuf_left =
|
||||
HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
};
|
||||
@ -896,7 +715,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
|
||||
if (sceKernelIsNeoMode()) {
|
||||
std::memcpy(cmdbuf, CtxInitSequenceNeo.data(), CtxInitSequenceNeo.size() * 4);
|
||||
} else {
|
||||
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
|
||||
}
|
||||
return CtxInitPacketSize;
|
||||
}
|
||||
|
||||
@ -908,7 +731,16 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
std::memcpy(cmdbuf, CtxInitSequence400Neo.data(), CtxInitSequence400Neo.size() * 4);
|
||||
} else {
|
||||
std::memcpy(cmdbuf, CtxInitSequence400NeoCompat.data(),
|
||||
CtxInitSequence400NeoCompat.size() * 4);
|
||||
}
|
||||
} else {
|
||||
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
|
||||
}
|
||||
return CtxInitPacketSize;
|
||||
}
|
||||
|
||||
@ -1030,7 +862,8 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
|
||||
|
||||
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
return Config::isNeoMode() ? 911'000'000 : 800'000'000;
|
||||
// On console this uses an ioctl check, but we assume it is equal to just checking for neo mode.
|
||||
return sceKernelIsNeoMode() ? 911'000'000 : 800'000'000;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
|
||||
@ -1369,7 +1202,15 @@ s32 PS4_SYSV_ABI sceGnmResetVgtControl(u32* cmdbuf, u32 size) {
|
||||
if (cmdbuf == nullptr || size != 3) {
|
||||
return -1;
|
||||
}
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u, 0x6d007fu); // IA_MULTI_VGT_PARAM
|
||||
} else {
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, 0xd00ffu); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
} else {
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
@ -1830,9 +1671,25 @@ s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_mi
|
||||
return -1;
|
||||
}
|
||||
|
||||
const u32 reg_value =
|
||||
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
if (sceKernelIsNeoMode()) {
|
||||
const u32 wd_switch_on_eop = u32(wd_switch_only_on_eop_mode != 0) << 0x14;
|
||||
const u32 switch_on_eoi = u32(wd_switch_only_on_eop_mode == 0) << 0x13;
|
||||
const u32 reg_value =
|
||||
wd_switch_only_on_eop_mode != 0
|
||||
? (partial_vs_wave_mode & 1) << 0x10 | prim_group_sz_minus_one | wd_switch_on_eop |
|
||||
switch_on_eoi | 0x40000u
|
||||
: prim_group_sz_minus_one & 0x1cffffu | wd_switch_on_eop | switch_on_eoi | 0x50000u;
|
||||
if (!UseNeoCompatSequences) {
|
||||
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u,
|
||||
reg_value | 0x600000u); // IA_MULTI_VGT_PARAM
|
||||
} else {
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
} else {
|
||||
const u32 reg_value =
|
||||
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
@ -2215,9 +2072,25 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
||||
if (sdk_version <= 0x1ffffffu) {
|
||||
liverpool->SubmitGfx(InitSequence, {});
|
||||
} else if (sdk_version <= 0x3ffffffu) {
|
||||
liverpool->SubmitGfx(InitSequence200, {});
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
liverpool->SubmitGfx(InitSequence200Neo, {});
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence200NeoCompat, {});
|
||||
}
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence200, {});
|
||||
}
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence350, {});
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
liverpool->SubmitGfx(InitSequence350Neo, {});
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence350NeoCompat, {});
|
||||
}
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence350, {});
|
||||
}
|
||||
}
|
||||
send_init_packet = false;
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ int PS4_SYSV_ABI sceGnmDisableMipStatsReport();
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
|
||||
u32 threads_z, u32 flags);
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags);
|
||||
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec();
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier);
|
||||
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size);
|
||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
|
||||
u32 flags, u32 type);
|
||||
|
542
src/core/libraries/gnmdriver/gnmdriver_init.h
Normal file
542
src/core/libraries/gnmdriver/gnmdriver_init.h
Normal file
@ -0,0 +1,542 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace Libraries::GnmDriver {
|
||||
|
||||
constexpr auto HwInitPacketSize = 0x100u;
|
||||
|
||||
// clang-format off
|
||||
constexpr std::array InitSequence{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6000000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence.size() == 0x73 + 2);
|
||||
|
||||
constexpr std::array InitSequence175{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence175.size() == 0x73 + 2);
|
||||
|
||||
constexpr std::array InitSequence200{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence200.size() == 0x76 + 2);
|
||||
|
||||
constexpr std::array InitSequence200Neo{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0017900u, 0x40000258u, 0x6d007fu,
|
||||
};
|
||||
static_assert(InitSequence200Neo.size() == 0x83 + 2);
|
||||
|
||||
constexpr std::array InitSequence200NeoCompat{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x100002aau, 0xd00ffu,
|
||||
};
|
||||
static_assert(InitSequence200NeoCompat.size() == 0x83 + 2);
|
||||
|
||||
constexpr std::array InitSequence350{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
};
|
||||
static_assert(InitSequence350.size() == 0x7c + 2);
|
||||
|
||||
constexpr std::array InitSequence350Neo{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0017900u, 0x40000258u, 0x6d007fu,
|
||||
};
|
||||
static_assert(InitSequence350Neo.size() == 0x86 + 2);
|
||||
|
||||
constexpr std::array InitSequence350NeoCompat{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x100002aau, 0xd00ffu,
|
||||
};
|
||||
static_assert(InitSequence350NeoCompat.size() == 0x86 + 2);
|
||||
|
||||
constexpr std::array CtxInitSequence{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0111000u, 0u
|
||||
};
|
||||
static_assert(CtxInitSequence.size() == 0x0f);
|
||||
|
||||
constexpr std::array CtxInitSequenceNeo{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc00d1000, 0u
|
||||
};
|
||||
static_assert(CtxInitSequenceNeo.size() == 0x13);
|
||||
|
||||
constexpr std::array CtxInitSequence400{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
0xc09e1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400.size() == 0x61);
|
||||
|
||||
constexpr std::array CtxInitSequence400Neo{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x40000258u, 0x6d007fu,
|
||||
0xc09a1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400Neo.size() == 0x65);
|
||||
|
||||
constexpr std::array CtxInitSequence400NeoCompat{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0016900u, 0x100002aau, 0xd00ffu,
|
||||
0xc09a1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400Neo.size() == 0x65);
|
||||
// clang-format on
|
||||
|
||||
} // namespace Libraries::GnmDriver
|
@ -505,13 +505,13 @@ int PS4_SYSV_ABI posix_munmap(void* addr, size_t len) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static constexpr int MAX_PTR_APERTURES = 3;
|
||||
static constexpr int MAX_PRT_APERTURES = 3;
|
||||
static constexpr VAddr PRT_AREA_START_ADDR = 0x1000000000;
|
||||
static constexpr size_t PRT_AREA_SIZE = 0xec00000000;
|
||||
static std::array<std::pair<VAddr, size_t>, MAX_PTR_APERTURES> PrtApertures{};
|
||||
static std::array<std::pair<VAddr, size_t>, MAX_PRT_APERTURES> PrtApertures{};
|
||||
|
||||
int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
|
||||
if (id < 0 || id >= MAX_PTR_APERTURES) {
|
||||
if (id < 0 || id >= MAX_PRT_APERTURES) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
@ -531,12 +531,12 @@ int PS4_SYSV_ABI sceKernelSetPrtAperture(int id, VAddr address, size_t size) {
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* addres, size_t* size) {
|
||||
if (id < 0 || id >= MAX_PTR_APERTURES) {
|
||||
int PS4_SYSV_ABI sceKernelGetPrtAperture(int id, VAddr* address, size_t* size) {
|
||||
if (id < 0 || id >= MAX_PRT_APERTURES) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
|
||||
std::tie(*addres, *size) = PrtApertures[id];
|
||||
std::tie(*address, *size) = PrtApertures[id];
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,8 @@ namespace Libraries::Kernel {
|
||||
|
||||
int PS4_SYSV_ABI sceKernelIsNeoMode() {
|
||||
LOG_DEBUG(Kernel_Sce, "called");
|
||||
return Config::isNeoMode();
|
||||
return Config::isNeoModeConsole() &&
|
||||
Common::ElfInfo::Instance().PSFAttributes().support_neo_mode;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {
|
||||
|
@ -55,6 +55,9 @@ public:
|
||||
stop.request_stop();
|
||||
Join();
|
||||
}
|
||||
thread = nullptr;
|
||||
func = nullptr;
|
||||
stop = std::stop_source{};
|
||||
}
|
||||
|
||||
static void* PS4_SYSV_ABI RunWrapper(void* arg) {
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "core/libraries/libc_internal/libc_internal.h"
|
||||
#include "core/libraries/libpng/pngdec.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/mouse/mouse.h"
|
||||
#include "core/libraries/move/move.h"
|
||||
#include "core/libraries/network/http.h"
|
||||
#include "core/libraries/network/net.h"
|
||||
@ -97,6 +98,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
|
||||
Libraries::Move::RegisterlibSceMove(sym);
|
||||
Libraries::Fiber::RegisterlibSceFiber(sym);
|
||||
Libraries::JpegEnc::RegisterlibSceJpegEnc(sym);
|
||||
Libraries::Mouse::RegisterlibSceMouse(sym);
|
||||
}
|
||||
|
||||
} // namespace Libraries
|
||||
|
99
src/core/libraries/mouse/mouse.cpp
Normal file
99
src/core/libraries/mouse/mouse.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// Generated By moduleGenerator
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "mouse.h"
|
||||
|
||||
namespace Libraries::Mouse {
|
||||
|
||||
int PS4_SYSV_ABI sceMouseClose() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseConnectPort() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDebugGetDeviceId() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDeviceOpen() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDisconnectDevice() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDisconnectPort() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseGetDeviceInfo() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseInit() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseMbusInit() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseOpen() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseRead() {
|
||||
LOG_DEBUG(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetHandType() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetPointerSpeed() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetProcessPrivilege() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("cAnT0Rw-IwU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseClose);
|
||||
LIB_FUNCTION("Ymyy1HSSJLQ", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseConnectPort);
|
||||
LIB_FUNCTION("BRXOoXQtb+k", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDebugGetDeviceId);
|
||||
LIB_FUNCTION("WiGKINCZWkc", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDeviceOpen);
|
||||
LIB_FUNCTION("eDQTFHbgeTU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectDevice);
|
||||
LIB_FUNCTION("jJP1vYMEPd4", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectPort);
|
||||
LIB_FUNCTION("QA9Qupz3Zjw", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseGetDeviceInfo);
|
||||
LIB_FUNCTION("Qs0wWulgl7U", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseInit);
|
||||
LIB_FUNCTION("1FeceR5YhAo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseMbusInit);
|
||||
LIB_FUNCTION("RaqxZIf6DvE", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseOpen);
|
||||
LIB_FUNCTION("x8qnXqh-tiM", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseRead);
|
||||
LIB_FUNCTION("crkFfp-cmFo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetHandType);
|
||||
LIB_FUNCTION("ghLUU2Z5Lcg", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetPointerSpeed);
|
||||
LIB_FUNCTION("6aANndpS0Wo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetProcessPrivilege);
|
||||
};
|
||||
|
||||
} // namespace Libraries::Mouse
|
29
src/core/libraries/mouse/mouse.h
Normal file
29
src/core/libraries/mouse/mouse.h
Normal file
@ -0,0 +1,29 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Loader {
|
||||
class SymbolsResolver;
|
||||
}
|
||||
|
||||
namespace Libraries::Mouse {
|
||||
|
||||
int PS4_SYSV_ABI sceMouseClose();
|
||||
int PS4_SYSV_ABI sceMouseConnectPort();
|
||||
int PS4_SYSV_ABI sceMouseDebugGetDeviceId();
|
||||
int PS4_SYSV_ABI sceMouseDeviceOpen();
|
||||
int PS4_SYSV_ABI sceMouseDisconnectDevice();
|
||||
int PS4_SYSV_ABI sceMouseDisconnectPort();
|
||||
int PS4_SYSV_ABI sceMouseGetDeviceInfo();
|
||||
int PS4_SYSV_ABI sceMouseInit();
|
||||
int PS4_SYSV_ABI sceMouseMbusInit();
|
||||
int PS4_SYSV_ABI sceMouseOpen();
|
||||
int PS4_SYSV_ABI sceMouseRead();
|
||||
int PS4_SYSV_ABI sceMouseSetHandType();
|
||||
int PS4_SYSV_ABI sceMouseSetPointerSpeed();
|
||||
int PS4_SYSV_ABI sceMouseSetProcessPrivilege();
|
||||
|
||||
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym);
|
||||
} // namespace Libraries::Mouse
|
@ -972,11 +972,8 @@ int PS4_SYSV_ABI sceNpGetGamePresenceStatusA() {
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNpGetNpId(OrbisUserServiceUserId user_id, OrbisNpId* np_id) {
|
||||
LOG_INFO(Lib_NpManager, "user_id {}", user_id);
|
||||
const auto name = Config::getUserName();
|
||||
std::memset(np_id, 0, sizeof(OrbisNpId));
|
||||
name.copy(np_id->handle.data, sizeof(np_id->handle.data));
|
||||
return ORBIS_OK;
|
||||
LOG_DEBUG(Lib_NpManager, "user_id {}", user_id);
|
||||
return ORBIS_NP_ERROR_SIGNED_OUT;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNpGetNpReachabilityState() {
|
||||
@ -986,10 +983,7 @@ int PS4_SYSV_ABI sceNpGetNpReachabilityState() {
|
||||
|
||||
int PS4_SYSV_ABI sceNpGetOnlineId(s32 user_id, OrbisNpOnlineId* online_id) {
|
||||
LOG_DEBUG(Lib_NpManager, "user_id {}", user_id);
|
||||
const auto name = Config::getUserName();
|
||||
std::memset(online_id, 0, sizeof(OrbisNpOnlineId));
|
||||
name.copy(online_id->data, sizeof(online_id->data));
|
||||
return ORBIS_OK;
|
||||
return ORBIS_NP_ERROR_SIGNED_OUT;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceNpGetParentalControlInfo() {
|
||||
|
@ -104,8 +104,8 @@ int PS4_SYSV_ABI scePadGetControllerInformation(s32 handle, OrbisPadControllerIn
|
||||
pInfo->touchPadInfo.pixelDensity = 1;
|
||||
pInfo->touchPadInfo.resolution.x = 1920;
|
||||
pInfo->touchPadInfo.resolution.y = 950;
|
||||
pInfo->stickInfo.deadZoneLeft = 20;
|
||||
pInfo->stickInfo.deadZoneRight = 20;
|
||||
pInfo->stickInfo.deadZoneLeft = 2;
|
||||
pInfo->stickInfo.deadZoneRight = 2;
|
||||
pInfo->connectionType = ORBIS_PAD_PORT_TYPE_STANDARD;
|
||||
pInfo->connectedCount = 1;
|
||||
pInfo->connected = true;
|
||||
|
@ -157,7 +157,7 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh
|
||||
}
|
||||
|
||||
for (int i = 0; i < numberOfEntries; i++) {
|
||||
if (chunkIds[i] <= playgo->chunks.size()) {
|
||||
if (chunkIds[i] < playgo->chunks.size()) {
|
||||
outLoci[i] = OrbisPlayGoLocus::LocalFast;
|
||||
} else {
|
||||
outLoci[i] = OrbisPlayGoLocus::NotDownloaded;
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <span>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include <core/libraries/system/msgdialog_ui.h>
|
||||
@ -1139,10 +1140,6 @@ Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getPar
|
||||
LOG_INFO(Lib_SaveData, "called without save memory initialized");
|
||||
return Error::MEMORY_NOT_READY;
|
||||
}
|
||||
if (SaveMemory::IsSaving()) {
|
||||
LOG_TRACE(Lib_SaveData, "called while saving");
|
||||
return Error::BUSY_FOR_SAVING;
|
||||
}
|
||||
LOG_DEBUG(Lib_SaveData, "called");
|
||||
auto data = getParam->data;
|
||||
if (data != nullptr) {
|
||||
@ -1502,8 +1499,14 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2*
|
||||
return Error::MEMORY_NOT_READY;
|
||||
}
|
||||
if (SaveMemory::IsSaving()) {
|
||||
LOG_TRACE(Lib_SaveData, "called while saving");
|
||||
return Error::BUSY_FOR_SAVING;
|
||||
int count = 0;
|
||||
while (++count < 100 && SaveMemory::IsSaving()) { // try for more 10 seconds
|
||||
std::this_thread::sleep_for(chrono::milliseconds(100));
|
||||
}
|
||||
if (SaveMemory::IsSaving()) {
|
||||
LOG_TRACE(Lib_SaveData, "called while saving");
|
||||
return Error::BUSY_FOR_SAVING;
|
||||
}
|
||||
}
|
||||
LOG_DEBUG(Lib_SaveData, "called");
|
||||
auto data = setParam->data;
|
||||
@ -1584,8 +1587,8 @@ Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetu
|
||||
} else {
|
||||
SaveMemory::SetIcon(nullptr, 0);
|
||||
}
|
||||
SaveMemory::TriggerSaveWithoutEvent();
|
||||
}
|
||||
SaveMemory::TriggerSaveWithoutEvent();
|
||||
if (g_fw_ver >= ElfInfo::FW_45 && result != nullptr) {
|
||||
result->existedMemorySize = existed_size;
|
||||
}
|
||||
|
@ -10,327 +10,327 @@
|
||||
namespace Libraries::Usbd {
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdAllocTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdAttachKernelDriver() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdBulkTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdCancelTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdCheckConnected() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdClaimInterface() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdClearHalt() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdClose() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdControlTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdControlTransferGetData() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdControlTransferGetSetup() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdDetachKernelDriver() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdEventHandlerActive() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdEventHandlingOk() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdExit() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFillBulkTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFillControlSetup() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFillControlTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFillInterruptTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFillIsoTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFreeConfigDescriptor() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFreeDeviceList() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdFreeTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetActiveConfigDescriptor() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetBusNumber() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetConfigDescriptor() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetConfigDescriptorByValue() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetConfiguration() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetDescriptor() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetDevice() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetDeviceAddress() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetDeviceDescriptor() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetDeviceList() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetDeviceSpeed() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetIsoPacketBuffer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetMaxIsoPacketSize() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetMaxPacketSize() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetStringDescriptor() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdGetStringDescriptorAscii() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdHandleEvents() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdHandleEventsLocked() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdHandleEventsTimeout() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_DEBUG(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdInit() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return 0x80240005; // Skip
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdInterruptTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdKernelDriverActive() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdLockEvents() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdLockEventWaiters() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdOpen() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdOpenDeviceWithVidPid() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdRefDevice() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdReleaseInterface() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdResetDevice() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdSetConfiguration() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdSetInterfaceAltSetting() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdSetIsoPacketLengths() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdSubmitTransfer() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdTryLockEvents() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdUnlockEvents() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdUnlockEventWaiters() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdUnrefDevice() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceUsbdWaitForEvent() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI Func_65F6EF33E38FFF50() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI Func_97F056BAD90AADE7() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI Func_C55104A33B35B264() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI Func_D56B43060720B1E0() {
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED)called");
|
||||
LOG_ERROR(Lib_Usbd, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "common/debug.h"
|
||||
#include "core/libraries/kernel/memory.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
||||
@ -35,7 +36,7 @@ MemoryManager::~MemoryManager() = default;
|
||||
|
||||
void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1,
|
||||
bool use_extended_mem2) {
|
||||
const bool is_neo = Config::isNeoMode();
|
||||
const bool is_neo = ::Libraries::Kernel::sceKernelIsNeoMode();
|
||||
auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM;
|
||||
if (!use_extended_mem1 && is_neo) {
|
||||
total_size -= 256_MB;
|
||||
|
105
src/emulator.cpp
105
src/emulator.cpp
@ -28,8 +28,6 @@
|
||||
#include "core/file_format/trp.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
#include "core/libraries/disc_map/disc_map.h"
|
||||
#include "core/libraries/fiber/fiber.h"
|
||||
#include "core/libraries/jpeg/jpegenc.h"
|
||||
#include "core/libraries/libc_internal/libc_internal.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/ngs2/ngs2.h"
|
||||
@ -59,8 +57,8 @@ Emulator::Emulator() {
|
||||
LOG_INFO(Loader, "Branch {}", Common::g_scm_branch);
|
||||
LOG_INFO(Loader, "Description {}", Common::g_scm_desc);
|
||||
|
||||
LOG_INFO(Config, "General Logtype: {}", Config::getLogType());
|
||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoMode());
|
||||
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
|
||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
||||
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
||||
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
||||
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
||||
@ -101,19 +99,12 @@ Emulator::~Emulator() {
|
||||
}
|
||||
|
||||
void Emulator::Run(const std::filesystem::path& file) {
|
||||
|
||||
// Use the eboot from the separated updates folder if it's there
|
||||
std::filesystem::path game_patch_folder = file.parent_path();
|
||||
game_patch_folder += "-UPDATE";
|
||||
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
|
||||
? game_patch_folder / file.filename()
|
||||
: file;
|
||||
|
||||
// Applications expect to be run from /app0 so mount the file's parent path as app0.
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
mnt->Mount(file.parent_path(), "/app0");
|
||||
const auto game_folder = file.parent_path();
|
||||
mnt->Mount(game_folder, "/app0");
|
||||
// Certain games may use /hostapp as well such as CUSA001100
|
||||
mnt->Mount(file.parent_path(), "/hostapp");
|
||||
mnt->Mount(game_folder, "/hostapp");
|
||||
|
||||
auto& game_info = Common::ElfInfo::Instance();
|
||||
|
||||
@ -122,50 +113,52 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
std::string title;
|
||||
std::string app_version;
|
||||
u32 fw_version;
|
||||
Common::PSFAttributes psf_attributes{};
|
||||
|
||||
std::filesystem::path sce_sys_folder = eboot_path.parent_path() / "sce_sys";
|
||||
if (std::filesystem::is_directory(sce_sys_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) {
|
||||
if (entry.path().filename() == "param.sfo") {
|
||||
auto* param_sfo = Common::Singleton<PSF>::Instance();
|
||||
const bool success = param_sfo->Open(sce_sys_folder / "param.sfo");
|
||||
ASSERT_MSG(success, "Failed to open param.sfo");
|
||||
const auto content_id = param_sfo->GetString("CONTENT_ID");
|
||||
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
|
||||
id = std::string(*content_id, 7, 9);
|
||||
Libraries::NpTrophy::game_serial = id;
|
||||
const auto trophyDir =
|
||||
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
|
||||
if (!std::filesystem::exists(trophyDir)) {
|
||||
TRP trp;
|
||||
if (!trp.Extract(eboot_path.parent_path(), id)) {
|
||||
LOG_ERROR(Loader, "Couldn't extract trophies");
|
||||
}
|
||||
}
|
||||
const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo");
|
||||
if (std::filesystem::exists(param_sfo_path)) {
|
||||
auto* param_sfo = Common::Singleton<PSF>::Instance();
|
||||
const bool success = param_sfo->Open(param_sfo_path);
|
||||
ASSERT_MSG(success, "Failed to open param.sfo");
|
||||
const auto content_id = param_sfo->GetString("CONTENT_ID");
|
||||
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
|
||||
id = std::string(*content_id, 7, 9);
|
||||
Libraries::NpTrophy::game_serial = id;
|
||||
const auto trophyDir =
|
||||
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
|
||||
if (!std::filesystem::exists(trophyDir)) {
|
||||
TRP trp;
|
||||
if (!trp.Extract(game_folder, id)) {
|
||||
LOG_ERROR(Loader, "Couldn't extract trophies");
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_QT_GUI
|
||||
MemoryPatcher::g_game_serial = id;
|
||||
MemoryPatcher::g_game_serial = id;
|
||||
|
||||
// Timer for 'Play Time'
|
||||
QTimer* timer = new QTimer();
|
||||
QObject::connect(timer, &QTimer::timeout, [this, id]() {
|
||||
UpdatePlayTime(id);
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
});
|
||||
timer->start(60000); // 60000 ms = 1 minute
|
||||
// Timer for 'Play Time'
|
||||
QTimer* timer = new QTimer();
|
||||
QObject::connect(timer, &QTimer::timeout, [this, id]() {
|
||||
UpdatePlayTime(id);
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
});
|
||||
timer->start(60000); // 60000 ms = 1 minute
|
||||
#endif
|
||||
title = param_sfo->GetString("TITLE").value_or("Unknown title");
|
||||
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
|
||||
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
|
||||
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
|
||||
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
|
||||
} else if (entry.path().filename() == "pic1.png") {
|
||||
auto* splash = Common::Singleton<Splash>::Instance();
|
||||
if (splash->IsLoaded()) {
|
||||
continue;
|
||||
}
|
||||
if (!splash->Open(entry.path())) {
|
||||
LOG_ERROR(Loader, "Game splash: unable to open file");
|
||||
}
|
||||
title = param_sfo->GetString("TITLE").value_or("Unknown title");
|
||||
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
|
||||
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
|
||||
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
|
||||
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
|
||||
if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) {
|
||||
psf_attributes.raw = *raw_attributes;
|
||||
}
|
||||
}
|
||||
|
||||
const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png");
|
||||
if (std::filesystem::exists(pic1_path)) {
|
||||
auto* splash = Common::Singleton<Splash>::Instance();
|
||||
if (!splash->IsLoaded()) {
|
||||
if (!splash->Open(pic1_path)) {
|
||||
LOG_ERROR(Loader, "Game splash: unable to open file");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -176,6 +169,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
game_info.app_ver = app_version;
|
||||
game_info.firmware_ver = fw_version & 0xFFF00000;
|
||||
game_info.raw_firmware_ver = fw_version;
|
||||
game_info.psf_attributes = psf_attributes;
|
||||
|
||||
std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version);
|
||||
std::string window_title = "";
|
||||
@ -219,6 +213,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
Libraries::InitHLELibs(&linker->GetHLESymbols());
|
||||
|
||||
// Load the module with the linker
|
||||
const auto eboot_path = mnt->GetHostPath("/app0/" + file.filename().string());
|
||||
linker->LoadModule(eboot_path);
|
||||
|
||||
// check if we have system modules to load
|
||||
@ -236,6 +231,8 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
}
|
||||
|
||||
// Load all prx from separate update's sce_module folder
|
||||
std::filesystem::path game_patch_folder = game_folder;
|
||||
game_patch_folder += "-UPDATE";
|
||||
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
|
||||
if (std::filesystem::is_directory(update_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <SDL3/SDL.h>
|
||||
#include "common/config.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/kernel/time.h"
|
||||
#include "core/libraries/pad/pad.h"
|
||||
@ -189,11 +190,6 @@ void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceler
|
||||
gz += Kp * ez + Ki * eInt[2];
|
||||
|
||||
//// Integrate rate of change of quaternion
|
||||
// float pa = q2, pb = q3, pc = q4;
|
||||
// q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
|
||||
// q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime);
|
||||
// q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime);
|
||||
// q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime);
|
||||
q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
|
||||
q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime);
|
||||
q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime);
|
||||
@ -247,18 +243,21 @@ void GameController::TryOpenSDLController() {
|
||||
int gamepad_count;
|
||||
SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count);
|
||||
m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr;
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
|
||||
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
|
||||
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
|
||||
}
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
|
||||
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
|
||||
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
|
||||
if (Config::getIsMotionControlsEnabled()) {
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
|
||||
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
|
||||
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
|
||||
}
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
|
||||
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
|
||||
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
|
||||
}
|
||||
}
|
||||
|
||||
SDL_free(gamepads);
|
||||
|
||||
SetLightBarRGB(0, 0, 255);
|
||||
@ -266,6 +265,7 @@ void GameController::TryOpenSDLController() {
|
||||
}
|
||||
|
||||
u32 GameController::Poll() {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
if (m_connected) {
|
||||
auto time = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
if (m_states_num == 0) {
|
||||
|
@ -341,6 +341,8 @@ void SettingsDialog::LoadValuesFromConfig() {
|
||||
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
|
||||
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
|
||||
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
|
||||
ui->motionControlsCheckBox->setChecked(
|
||||
toml::find_or<bool>(data, "Input", "isMotionControlsEnabled", true));
|
||||
|
||||
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
|
||||
ResetInstallFolders();
|
||||
@ -536,6 +538,7 @@ void SettingsDialog::UpdateSettings() {
|
||||
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
|
||||
Config::setIsFullscreen(ui->fullscreenCheckBox->isChecked());
|
||||
Config::setFullscreenMode(ui->fullscreenModeComboBox->currentText().toStdString());
|
||||
Config::setIsMotionControlsEnabled(ui->motionControlsCheckBox->isChecked());
|
||||
Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked());
|
||||
Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
|
||||
Config::setLogType(ui->logTypeComboBox->currentText().toStdString());
|
||||
|
@ -853,6 +853,13 @@
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="motionControlsCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Motion Controls</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="controllerWidgetSpacer" native="true">
|
||||
<property name="enabled">
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1415
src/qt_gui/translations/sv.ts
Normal file
1415
src/qt_gui/translations/sv.ts
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -205,7 +205,6 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
|
||||
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
||||
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
||||
}
|
||||
emit_ds_read_barrier = true;
|
||||
}
|
||||
|
||||
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
@ -222,11 +221,6 @@ void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
||||
|
||||
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
||||
const GcnInst& inst) {
|
||||
if (emit_ds_read_barrier && profile.needs_lds_barriers) {
|
||||
ir.Barrier();
|
||||
emit_ds_read_barrier = false;
|
||||
}
|
||||
|
||||
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
if (is_pair) {
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||
return;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle;
|
||||
const auto col_buf = runtime_info.fs_info.color_buffers[index];
|
||||
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
|
||||
const auto [r, g, b, a] = col_buf.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp = swizzle_array[comp];
|
||||
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
|
||||
ir.SetAttribute(attrib, value, comp);
|
||||
ir.SetAttribute(attrib, converted, comp);
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
};
|
||||
|
||||
const auto unpack = [&](u32 idx) {
|
||||
|
@ -106,6 +106,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
return S_FF1_I32_B32(inst);
|
||||
case Opcode::S_FF1_I32_B64:
|
||||
return S_FF1_I32_B64(inst);
|
||||
case Opcode::S_BITSET0_B32:
|
||||
return S_BITSET_B32(inst, 0);
|
||||
case Opcode::S_BITSET1_B32:
|
||||
return S_BITSET_B32(inst, 1);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_SAVEEXEC_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_ORN2_SAVEEXEC_B64:
|
||||
@ -607,6 +611,13 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
|
||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
|
||||
const IR::U32 result{ir.BitFieldInsert(old_value, ir.Imm32(bit_value), offset, ir.Imm32(1U))};
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
|
@ -114,6 +114,7 @@ public:
|
||||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
||||
void S_FF1_I32_B32(const GcnInst& inst);
|
||||
void S_FF1_I32_B64(const GcnInst& inst);
|
||||
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
void S_ABS_I32(const GcnInst& inst);
|
||||
@ -308,7 +309,6 @@ private:
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
bool opcode_missing = false;
|
||||
bool emit_ds_read_barrier = false;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
||||
|
@ -904,7 +904,7 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
case ConditionOp::U:
|
||||
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
|
||||
return ir.LogicalOr(ir.FPIsNan(src0), ir.FPIsNan(src1));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -301,8 +301,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
@ -317,19 +316,191 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
});
|
||||
}
|
||||
|
||||
// Update buffer descriptor format.
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
// Read sampler sharp.
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
// Patch image and sampler handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
|
||||
} else {
|
||||
// Patch image handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
ASSERT(!buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
// Address of constant buffer reads can be calculated at IR emission time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
const IR::U32 index_stride = ir.Imm32(buffer.index_stride);
|
||||
const IR::U32 element_size = ir.Imm32(buffer.element_size);
|
||||
|
||||
@ -366,21 +537,27 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.texture_buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.SetArg(2, converted);
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
@ -409,39 +586,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors, const IR::Inst* producer,
|
||||
const u32 image_binding, const AmdGpu::Image& image) {
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const AmdGpu::Image& image) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
|
||||
auto sampler = sampler_res.GetSharp(info);
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16);
|
||||
|
||||
IR::Inst* body1 = inst.Arg(1).InstRecursive();
|
||||
IR::Inst* body2 = inst.Arg(2).InstRecursive();
|
||||
@ -539,8 +691,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
// Query dimensions of image if needed for normalization.
|
||||
// We can't use the image sharp because it could be bound to a different image later.
|
||||
const auto dimensions =
|
||||
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||
: IR::Value{};
|
||||
unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{};
|
||||
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(coord_idx);
|
||||
if (unnormalized) {
|
||||
@ -589,7 +740,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
: IR::F32{};
|
||||
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||
|
||||
auto new_inst = [&] -> IR::Value {
|
||||
auto texel = [&] -> IR::Value {
|
||||
if (inst_info.is_gather) {
|
||||
if (inst_info.is_depth) {
|
||||
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||
@ -611,94 +762,30 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
}
|
||||
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
|
||||
}();
|
||||
inst.ReplaceUsesWithAndRemove(new_inst);
|
||||
|
||||
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
// Sample instructions must be resolved into a new instruction using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
|
||||
return;
|
||||
}
|
||||
|
||||
// Patch image handle
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
|
||||
// No need to patch coordinates if we are just querying.
|
||||
void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
// Nothing to patch for dimension query.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto image_res = info.images[handle.U32() & 0xFFFF];
|
||||
auto image = image_res.GetSharp(info);
|
||||
|
||||
// Sample instructions must be handled separately using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleArgs(block, inst, info, image);
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
|
||||
// Now that we know the image type, adjust texture coordinate vector.
|
||||
IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
@ -719,152 +806,77 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written,
|
||||
inst_info.is_array),
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
|
||||
body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
}
|
||||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
||||
inst.SetArg(2, arg);
|
||||
} else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) &&
|
||||
(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite)) {
|
||||
inst.SetArg(3, arg);
|
||||
}
|
||||
}
|
||||
const auto has_ms = image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray;
|
||||
ASSERT(!inst_info.has_lod || !has_ms);
|
||||
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
|
||||
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
|
||||
|
||||
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto buffer_res = info.texture_buffers[binding];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
if (!buffer.Valid()) {
|
||||
// Don't need to swizzle invalid buffer.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto image_res = info.images[binding & 0xFFFF];
|
||||
const auto image = image_res.GetSharp(info);
|
||||
if (!image.Valid() || !image_res.IsStorage(image)) {
|
||||
// Don't need to swizzle invalid or non-storage image.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const auto lod = inst.Arg(2);
|
||||
const auto ms = inst.Arg(3);
|
||||
const auto texel =
|
||||
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod},
|
||||
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
const auto is_storage = image_res.IsStorage(image);
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
} else {
|
||||
inst.SetArg(1, coords);
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(2, lod);
|
||||
inst.SetArg(3, ms);
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
auto texel = inst.Arg(4);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.SetArg(4, converted);
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
||||
// Pass 1: Track resource sharps
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingInstruction(*block, inst, info, descriptors);
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass to reinterpret format read/write where needed, since we now know
|
||||
// the bindings and their properties.
|
||||
|
||||
// Pass 2: Patch instruction args
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInterpretation(*block, inst, info);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInterpretation(*block, inst, info);
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,54 @@
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
static void EmitBarrierInBlock(IR::Block* block) {
|
||||
// This is inteded to insert a barrier when shared memory write and read
|
||||
// occur in the same basic block. Also checks if branch depth is zero as
|
||||
// we don't want to insert barrier in potentially divergent code.
|
||||
bool emit_barrier_on_write = false;
|
||||
bool emit_barrier_on_read = false;
|
||||
const auto emit_barrier = [block](bool& emit_cond, IR::Inst& inst) {
|
||||
if (emit_cond) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
ir.Barrier();
|
||||
emit_cond = false;
|
||||
}
|
||||
};
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (inst.GetOpcode() == IR::Opcode::LoadSharedU32 ||
|
||||
inst.GetOpcode() == IR::Opcode::LoadSharedU64) {
|
||||
emit_barrier(emit_barrier_on_read, inst);
|
||||
emit_barrier_on_write = true;
|
||||
}
|
||||
if (inst.GetOpcode() == IR::Opcode::WriteSharedU32 ||
|
||||
inst.GetOpcode() == IR::Opcode::WriteSharedU64) {
|
||||
emit_barrier(emit_barrier_on_write, inst);
|
||||
emit_barrier_on_read = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitBarrierInMergeBlock(const IR::AbstractSyntaxNode::Data& data) {
|
||||
// Insert a barrier after divergent conditional blocks.
|
||||
// This avoids potential softlocks and crashes when some threads
|
||||
// initialize shared memory and others read from it.
|
||||
const IR::U1 cond = data.if_node.cond;
|
||||
const auto insert_barrier =
|
||||
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||
return true;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (insert_barrier) {
|
||||
IR::Block* const merge = data.if_node.merge;
|
||||
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||
IR::IREmitter ir{*merge, insert_point};
|
||||
ir.Barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
||||
if (!program.info.uses_shared || !profile.needs_lds_barriers) {
|
||||
return;
|
||||
@ -19,27 +67,12 @@ void SharedMemoryBarrierPass(IR::Program& program, const Profile& profile) {
|
||||
--branch_depth;
|
||||
continue;
|
||||
}
|
||||
if (node.type != Type::If) {
|
||||
if (node.type == Type::If && branch_depth++ == 0) {
|
||||
EmitBarrierInMergeBlock(node.data);
|
||||
continue;
|
||||
}
|
||||
u32 curr_depth = branch_depth++;
|
||||
if (curr_depth != 0) {
|
||||
continue;
|
||||
}
|
||||
const IR::U1 cond = node.data.if_node.cond;
|
||||
const auto insert_barrier =
|
||||
IR::BreadthFirstSearch(cond, [](IR::Inst* inst) -> std::optional<bool> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetAttributeU32 &&
|
||||
inst->Arg(0).Attribute() == IR::Attribute::LocalInvocationId) {
|
||||
return true;
|
||||
}
|
||||
return std::nullopt;
|
||||
});
|
||||
if (insert_barrier) {
|
||||
IR::Block* const merge = node.data.if_node.merge;
|
||||
auto insert_point = std::ranges::find_if_not(merge->Instructions(), IR::IsPhi);
|
||||
IR::IREmitter ir{*merge, insert_point};
|
||||
ir.Barrier();
|
||||
if (node.type == Type::Block && branch_depth == 0) {
|
||||
EmitBarrierInBlock(node.data.block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the read direction.
|
||||
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
return ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
return ir.ConvertSToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert 0...1 to -1...1
|
||||
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the write direction.
|
||||
inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToU(32, value)});
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToS(32, value)});
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert -1...1 to 0...1
|
||||
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
@ -180,6 +180,7 @@ struct FragmentRuntimeInfo {
|
||||
std::array<PsInput, 32> inputs;
|
||||
struct PsColorBuffer {
|
||||
AmdGpu::NumberFormat num_format;
|
||||
AmdGpu::NumberConversion num_conversion;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
|
||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||
|
@ -32,6 +32,7 @@ struct BufferSpecialization {
|
||||
struct TextureBufferSpecialization {
|
||||
bool is_integer = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
@ -41,6 +42,7 @@ struct ImageSpecialization {
|
||||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
@ -107,6 +109,7 @@ struct StageSpecialization {
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
@ -116,6 +119,7 @@ struct StageSpecialization {
|
||||
if (spec.is_storage) {
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
}
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, fmasks, info->fmasks,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
|
@ -454,7 +454,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::DrawIndirect: {
|
||||
const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header);
|
||||
const auto offset = draw_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(DrawIndirectArgs);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
@ -462,7 +461,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address));
|
||||
rasterizer->DrawIndirect(false, ib_address, offset, size, 1, 0);
|
||||
rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -471,7 +470,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto* draw_index_indirect =
|
||||
reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header);
|
||||
const auto offset = draw_index_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(DrawIndexedIndirectArgs);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
@ -480,7 +478,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndexIndirect", cmd_address));
|
||||
rasterizer->DrawIndirect(true, ib_address, offset, size, 1, 0);
|
||||
rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -489,7 +487,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto* draw_index_indirect =
|
||||
reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header);
|
||||
const auto offset = draw_index_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
@ -497,9 +494,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address));
|
||||
rasterizer->DrawIndirect(true, ib_address, offset, draw_index_indirect->stride,
|
||||
draw_index_indirect->count,
|
||||
draw_index_indirect->countAddr);
|
||||
rasterizer->DrawIndirect(
|
||||
true, indirect_args_addr, offset, draw_index_indirect->stride,
|
||||
draw_index_indirect->count, draw_index_indirect->countAddr);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -528,7 +525,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||
auto& cs_program = GetCsRegs();
|
||||
const auto offset = dispatch_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
@ -538,7 +534,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
||||
rasterizer->DispatchIndirect(ib_address, offset, size);
|
||||
rasterizer->DispatchIndirect(indirect_args_addr, offset, size);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -562,7 +558,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::SetBase: {
|
||||
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
|
||||
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
|
||||
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>();
|
||||
indirect_args_addr = set_base->Address<u64>();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
@ -823,10 +819,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchIndirect: {
|
||||
const auto* dispatch_indirect = reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||
const auto* dispatch_indirect =
|
||||
reinterpret_cast<const PM4CmdDispatchIndirectMec*>(header);
|
||||
auto& cs_program = GetCsRegs();
|
||||
const auto offset = dispatch_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[vqid].indirect_args_addr;
|
||||
const auto ib_address = dispatch_indirect->Address<VAddr>();
|
||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
@ -835,7 +831,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
||||
rasterizer->DispatchIndirect(ib_address, offset, size);
|
||||
rasterizer->DispatchIndirect(ib_address, 0, size);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
|
@ -20,9 +20,9 @@
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
class Rasterizer;
|
||||
@ -814,7 +814,9 @@ struct Liverpool {
|
||||
BitField<26, 1, u32> fmask_compression_disable_ci;
|
||||
BitField<27, 1, u32> fmask_compress_1frag_only;
|
||||
BitField<28, 1, u32> dcc_enable;
|
||||
BitField<29, 1, u32> cmask_addr_type;
|
||||
BitField<29, 2, u32> cmask_addr_type;
|
||||
/// Neo-mode only
|
||||
BitField<31, 1, u32> alt_tile_mode;
|
||||
|
||||
u32 u32all;
|
||||
} info;
|
||||
@ -889,17 +891,21 @@ struct Liverpool {
|
||||
return !info.linear_general;
|
||||
}
|
||||
|
||||
[[nodiscard]] DataFormat DataFormat() const {
|
||||
[[nodiscard]] DataFormat GetDataFmt() const {
|
||||
return RemapDataFormat(info.format);
|
||||
}
|
||||
|
||||
[[nodiscard]] NumberFormat NumFormat() const {
|
||||
[[nodiscard]] NumberFormat GetNumberFmt() const {
|
||||
// There is a small difference between T# and CB number types, account for it.
|
||||
return RemapNumberFormat(info.number_type == NumberFormat::SnormNz
|
||||
? NumberFormat::Srgb
|
||||
: info.number_type.Value());
|
||||
}
|
||||
|
||||
[[nodiscard]] NumberConversion GetNumberConversion() const {
|
||||
return MapNumberConversion(info.number_type);
|
||||
}
|
||||
|
||||
[[nodiscard]] CompMapping Swizzle() const {
|
||||
// clang-format off
|
||||
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
|
||||
@ -936,7 +942,7 @@ struct Liverpool {
|
||||
const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
|
||||
const auto components_idx = NumComponents(info.format) - 1;
|
||||
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
|
||||
return RemapComponents(info.format, mrt_swizzle);
|
||||
return RemapSwizzle(info.format, mrt_swizzle);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1477,11 +1483,12 @@ private:
|
||||
std::vector<u32> ccb_buffer;
|
||||
std::queue<Task::Handle> submits{};
|
||||
ComputeProgram cs_state{};
|
||||
VAddr indirect_args_addr{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
u32 num_mapped_queues{1u}; // GFX is always available
|
||||
|
||||
VAddr indirect_args_addr{};
|
||||
|
||||
struct ConstantEngine {
|
||||
void Reset() {
|
||||
ce_count = 0;
|
||||
|
@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) {
|
||||
return "Srgb";
|
||||
case NumberFormat::Ubnorm:
|
||||
return "Ubnorm";
|
||||
case NumberFormat::UbnromNz:
|
||||
case NumberFormat::UbnormNz:
|
||||
return "UbnormNz";
|
||||
case NumberFormat::Ubint:
|
||||
return "Ubint";
|
||||
|
@ -204,6 +204,11 @@ struct PM4CmdSetData {
|
||||
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
|
||||
return WritePacket<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
|
||||
}
|
||||
|
||||
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
|
||||
static constexpr u32* SetUconfigReg(u32* cmdbuf, Args... data) {
|
||||
return WritePacket<PM4ItOpcode::SetUconfigReg>(cmdbuf, type, data...);
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdNop {
|
||||
@ -791,6 +796,18 @@ struct PM4CmdDispatchIndirect {
|
||||
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||
};
|
||||
|
||||
struct PM4CmdDispatchIndirectMec {
|
||||
PM4Type3Header header;
|
||||
u32 address0;
|
||||
u32 address1;
|
||||
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||
|
||||
template <typename T>
|
||||
T Address() const {
|
||||
return std::bit_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
|
||||
}
|
||||
};
|
||||
|
||||
struct DrawIndirectArgs {
|
||||
u32 vertex_count_per_instance;
|
||||
u32 instance_count;
|
||||
|
@ -11,94 +11,6 @@
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
return format;
|
||||
}
|
||||
|
||||
inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return {
|
||||
.r = components.b,
|
||||
.g = components.g,
|
||||
.b = components.r,
|
||||
.a = components.a,
|
||||
};
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return {
|
||||
.r = components.a,
|
||||
.g = components.b,
|
||||
.b = components.g,
|
||||
.a = components.r,
|
||||
};
|
||||
default:
|
||||
return components;
|
||||
}
|
||||
}
|
||||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
u64 base_address : 44;
|
||||
@ -138,7 +50,7 @@ struct Buffer {
|
||||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
@ -149,6 +61,10 @@ struct Buffer {
|
||||
return RemapDataFormat(DataFormat(data_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride;
|
||||
}
|
||||
@ -261,7 +177,15 @@ struct Image {
|
||||
u64 min_lod_warn : 12;
|
||||
u64 counter_bank_id : 8;
|
||||
u64 lod_hw_cnt_en : 1;
|
||||
u64 : 43;
|
||||
/// Neo-mode only
|
||||
u64 compression_en : 1;
|
||||
/// Neo-mode only
|
||||
u64 alpha_is_on_msb : 1;
|
||||
/// Neo-mode only
|
||||
u64 color_transform : 1;
|
||||
/// Neo-mode only
|
||||
u64 alt_tile_mode : 1;
|
||||
u64 : 39;
|
||||
|
||||
static constexpr Image Null() {
|
||||
Image image{};
|
||||
@ -295,7 +219,7 @@ struct Image {
|
||||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
@ -344,6 +268,10 @@ struct Image {
|
||||
return RemapNumberFormat(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
TilingMode GetTilingMode() const {
|
||||
if (tiling_index >= 0 && tiling_index <= 7) {
|
||||
return tiling_index == 5 ? TilingMode::Texture_MicroTiled
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
@ -177,11 +178,130 @@ enum class NumberFormat : u32 {
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
UbnormNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None,
|
||||
UintToUscaled,
|
||||
SintToSscaled,
|
||||
UnormToUbnorm,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberFormat::Uint;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberFormat::Sint;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberFormat::Unorm;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.b;
|
||||
result.g = swizzle.g;
|
||||
result.b = swizzle.r;
|
||||
result.a = swizzle.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.a;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.g;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberConversion MapNumberConversion(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberConversion::UintToUscaled;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberConversion::SintToSscaled;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberConversion::UnormToUbnorm;
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
|
@ -119,19 +119,23 @@ public:
|
||||
return buffer;
|
||||
}
|
||||
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
|
||||
vk::PipelineStageFlagBits2 dst_stage) {
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
||||
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
||||
u32 offset = 0) {
|
||||
if (dst_acess_mask == access_mask && stage == dst_stage) {
|
||||
return {};
|
||||
}
|
||||
|
||||
DEBUG_ASSERT(offset < size_bytes);
|
||||
|
||||
auto barrier = vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = stage,
|
||||
.srcAccessMask = access_mask,
|
||||
.dstStageMask = dst_stage,
|
||||
.dstAccessMask = dst_acess_mask,
|
||||
.buffer = buffer.buffer,
|
||||
.size = size_bytes,
|
||||
.offset = offset,
|
||||
.size = size_bytes - offset,
|
||||
};
|
||||
access_mask = dst_acess_mask;
|
||||
stage = dst_stage;
|
||||
@ -150,8 +154,10 @@ public:
|
||||
Vulkan::Scheduler* scheduler;
|
||||
MemoryUsage usage;
|
||||
UniqueBuffer buffer;
|
||||
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
|
||||
vk::Flags<vk::AccessFlagBits2> access_mask{
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite};
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
|
||||
};
|
||||
|
||||
class StreamBuffer : public Buffer {
|
||||
|
@ -34,38 +34,19 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1);
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 16);
|
||||
ASSERT(null_id.index == 0);
|
||||
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
|
||||
|
||||
const vk::BufferViewCreateInfo null_view_ci = {
|
||||
.buffer = null_buffer,
|
||||
.format = vk::Format::eR8Unorm,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
};
|
||||
const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);
|
||||
ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
|
||||
null_buffer_view = null_view;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
|
||||
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
|
||||
std::scoped_lock lk{mutex};
|
||||
const bool is_tracked = IsRegionRegistered(device_addr, size);
|
||||
if (!is_tracked) {
|
||||
return;
|
||||
}
|
||||
// Mark the page as CPU modified to stop tracking writes.
|
||||
SCOPE_EXIT {
|
||||
if (is_tracked) {
|
||||
// Mark the page as CPU modified to stop tracking writes.
|
||||
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
|
||||
};
|
||||
if (!memory_tracker.IsRegionGpuModified(device_addr, size)) {
|
||||
// Page has not been modified by the GPU, nothing to do.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -267,7 +248,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||
const BufferId buffer_id = FindBuffer(address, num_bytes);
|
||||
return &slot_buffers[buffer_id];
|
||||
}();
|
||||
const vk::BufferMemoryBarrier2 buf_barrier = {
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer->Handle(),
|
||||
.offset = buffer->Offset(address),
|
||||
.size = num_bytes,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
@ -279,9 +269,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &buf_barrier,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
|
||||
}
|
||||
|
||||
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
|
||||
@ -346,6 +341,7 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
|
||||
++page;
|
||||
continue;
|
||||
}
|
||||
std::shared_lock lk{mutex};
|
||||
Buffer& buffer = slot_buffers[buffer_id];
|
||||
const VAddr buf_start_addr = buffer.CpuAddr();
|
||||
const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
|
||||
@ -472,21 +468,41 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
static constexpr vk::MemoryBarrier READ_BARRIER{
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
|
||||
};
|
||||
static constexpr vk::MemoryBarrier WRITE_BARRIER{
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
READ_BARRIER, {}, {});
|
||||
cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy);
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
||||
|
||||
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> pre_barriers{};
|
||||
if (auto src_barrier = overlap.GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||
pre_barriers.push_back(*src_barrier);
|
||||
}
|
||||
if (auto dst_barrier =
|
||||
new_buffer.GetBarrier(vk::AccessFlagBits2::eTransferWrite,
|
||||
vk::PipelineStageFlagBits2::eTransfer, dst_base_offset)) {
|
||||
pre_barriers.push_back(*dst_barrier);
|
||||
}
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(pre_barriers.size()),
|
||||
.pBufferMemoryBarriers = pre_barriers.data(),
|
||||
});
|
||||
|
||||
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
|
||||
|
||||
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> post_barriers{};
|
||||
if (auto src_barrier =
|
||||
overlap.GetBarrier(vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
vk::PipelineStageFlagBits2::eAllCommands)) {
|
||||
post_barriers.push_back(*src_barrier);
|
||||
}
|
||||
if (auto dst_barrier = new_buffer.GetBarrier(
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
vk::PipelineStageFlagBits2::eAllCommands, dst_base_offset)) {
|
||||
post_barriers.push_back(*dst_barrier);
|
||||
}
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
|
||||
.pBufferMemoryBarriers = post_barriers.data(),
|
||||
});
|
||||
DeleteBuffer(overlap_id);
|
||||
}
|
||||
|
||||
@ -496,8 +512,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
|
||||
wanted_size = static_cast<u32>(device_addr_end - device_addr);
|
||||
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
|
||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||
const BufferId new_buffer_id = slot_buffers.insert(
|
||||
instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
|
||||
const BufferId new_buffer_id = [&] {
|
||||
std::scoped_lock lk{mutex};
|
||||
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
|
||||
AllFlags, size);
|
||||
}();
|
||||
auto& new_buffer = slot_buffers[new_buffer_id];
|
||||
const size_t size_bytes = new_buffer.SizeBytes();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
@ -537,10 +556,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {
|
||||
|
||||
void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
bool is_texel_buffer) {
|
||||
std::scoped_lock lk{mutex};
|
||||
boost::container::small_vector<vk::BufferCopy, 4> copies;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
VAddr buffer_start = buffer.CpuAddr();
|
||||
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
|
||||
copies.push_back(vk::BufferCopy{
|
||||
@ -549,7 +566,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
.size = range_size,
|
||||
});
|
||||
total_size_bytes += range_size;
|
||||
largest_copy = std::max(largest_copy, range_size);
|
||||
});
|
||||
SCOPE_EXIT {
|
||||
if (is_texel_buffer) {
|
||||
@ -590,21 +606,36 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
}
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
static constexpr vk::MemoryBarrier READ_BARRIER{
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = buffer.SizeBytes(),
|
||||
};
|
||||
static constexpr vk::MemoryBarrier WRITE_BARRIER{
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = 0,
|
||||
.size = buffer.SizeBytes(),
|
||||
};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
|
||||
READ_BARRIER, {}, {});
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
});
|
||||
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
|
||||
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
|
||||
@ -612,7 +643,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
|
||||
TextureCache::BaseDesc desc{};
|
||||
desc.info.guest_address = device_addr;
|
||||
desc.info.guest_size_bytes = size;
|
||||
desc.info.guest_size = size;
|
||||
const ImageId image_id = texture_cache.FindImage(desc, find_flags);
|
||||
if (!image_id) {
|
||||
return false;
|
||||
@ -654,10 +685,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
}
|
||||
if (!copies.empty()) {
|
||||
scheduler.EndRendering();
|
||||
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = max_offset - size,
|
||||
.size = size,
|
||||
};
|
||||
const vk::BufferMemoryBarrier2 post_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.buffer = buffer.Handle(),
|
||||
.offset = max_offset - size,
|
||||
.size = size,
|
||||
};
|
||||
auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal,
|
||||
vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer, {});
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &pre_barrier,
|
||||
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
|
||||
.pImageMemoryBarriers = barriers.data(),
|
||||
});
|
||||
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(),
|
||||
copies);
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &post_barrier,
|
||||
});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <tsl/robin_map.h>
|
||||
@ -71,10 +71,6 @@ public:
|
||||
return slot_buffers[id];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::BufferView& NullBufferView() {
|
||||
return null_buffer_view;
|
||||
}
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
|
||||
@ -157,10 +153,9 @@ private:
|
||||
StreamBuffer staging_buffer;
|
||||
StreamBuffer stream_buffer;
|
||||
Buffer gds_buffer;
|
||||
std::mutex mutex;
|
||||
std::shared_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
vk::BufferView null_buffer_view;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
};
|
||||
|
@ -15,13 +15,8 @@ namespace VideoCore {
|
||||
class MemoryTracker {
|
||||
public:
|
||||
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
|
||||
static constexpr size_t HIGHER_PAGE_BITS = 22;
|
||||
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
||||
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
||||
static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
|
||||
static constexpr size_t MANAGER_POOL_SIZE = 32;
|
||||
static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
|
||||
using Manager = WordManager<WORDS_STACK_NEEDED>;
|
||||
|
||||
public:
|
||||
explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
|
||||
@ -30,7 +25,7 @@ public:
|
||||
/// Returns true if a region has been modified from the CPU
|
||||
[[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<true>(
|
||||
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
return manager->template IsRegionModified<Type::CPU>(offset, size);
|
||||
});
|
||||
}
|
||||
@ -38,52 +33,34 @@ public:
|
||||
/// Returns true if a region has been modified from the GPU
|
||||
[[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
|
||||
return IteratePages<false>(
|
||||
query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
|
||||
query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
|
||||
return manager->template IsRegionModified<Type::GPU>(offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Mark region as CPU modified, notifying the device_tracker about this change
|
||||
void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
||||
[](Manager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Unmark region as CPU modified, notifying the device_tracker about this change
|
||||
void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
|
||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
||||
[](Manager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::CPU, false>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Mark region as modified from the host GPU
|
||||
void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
||||
[](Manager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Unmark region as modified from the host GPU
|
||||
void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
|
||||
IteratePages<true>(dirty_cpu_addr, query_size,
|
||||
[](Manager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::GPU, false>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
IteratePages<false>(dirty_cpu_addr, query_size,
|
||||
[](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->template ChangeRegionState<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size);
|
||||
});
|
||||
}
|
||||
|
||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||
template <typename Func>
|
||||
void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
|
||||
IteratePages<true>(query_cpu_range, query_size,
|
||||
[&func](Manager* manager, u64 offset, size_t size) {
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
manager->template ForEachModifiedRange<Type::CPU, true>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
});
|
||||
@ -93,7 +70,7 @@ public:
|
||||
template <bool clear, typename Func>
|
||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
|
||||
IteratePages<false>(query_cpu_range, query_size,
|
||||
[&func](Manager* manager, u64 offset, size_t size) {
|
||||
[&func](RegionManager* manager, u64 offset, size_t size) {
|
||||
if constexpr (clear) {
|
||||
manager->template ForEachModifiedRange<Type::GPU, true>(
|
||||
manager->GetCpuAddr() + offset, size, func);
|
||||
@ -114,7 +91,7 @@ private:
|
||||
*/
|
||||
template <bool create_region_on_fail, typename Func>
|
||||
bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
|
||||
using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
|
||||
using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
std::size_t remaining_size{size};
|
||||
std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
|
||||
@ -155,7 +132,7 @@ private:
|
||||
manager_pool.emplace_back();
|
||||
auto& last_pool = manager_pool.back();
|
||||
for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
|
||||
std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE);
|
||||
std::construct_at(&last_pool[i], tracker, 0);
|
||||
free_managers.push_back(&last_pool[i]);
|
||||
}
|
||||
}
|
||||
@ -167,9 +144,9 @@ private:
|
||||
}
|
||||
|
||||
PageManager* tracker;
|
||||
std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
|
||||
std::vector<Manager*> free_managers;
|
||||
std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
|
||||
std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
|
||||
std::vector<RegionManager*> free_managers;
|
||||
std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -3,10 +3,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include "common/div_ceil.h"
|
||||
|
||||
#include "common/spin_lock.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/page_manager.h"
|
||||
|
||||
@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64;
|
||||
constexpr u64 BYTES_PER_PAGE = 4_KB;
|
||||
constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
|
||||
|
||||
constexpr u64 HIGHER_PAGE_BITS = 22;
|
||||
constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
|
||||
constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
|
||||
constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
|
||||
|
||||
enum class Type {
|
||||
CPU,
|
||||
GPU,
|
||||
Untracked,
|
||||
};
|
||||
|
||||
/// Vector tracking modified pages tightly packed with small vector optimization
|
||||
template <size_t stack_words = 1>
|
||||
struct WordsArray {
|
||||
/// Returns the pointer to the words state
|
||||
[[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
|
||||
return is_short ? stack.data() : heap;
|
||||
}
|
||||
using WordsArray = std::array<u64, NUM_REGION_WORDS>;
|
||||
|
||||
/// Returns the pointer to the words state
|
||||
[[nodiscard]] u64* Pointer(bool is_short) noexcept {
|
||||
return is_short ? stack.data() : heap;
|
||||
}
|
||||
|
||||
std::array<u64, stack_words> stack{}; ///< Small buffers storage
|
||||
u64* heap; ///< Not-small buffers pointer to the storage
|
||||
};
|
||||
|
||||
template <size_t stack_words = 1>
|
||||
struct Words {
|
||||
explicit Words() = default;
|
||||
explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
|
||||
num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
|
||||
if (IsShort()) {
|
||||
cpu.stack.fill(~u64{0});
|
||||
gpu.stack.fill(0);
|
||||
untracked.stack.fill(~u64{0});
|
||||
} else {
|
||||
// Share allocation between CPU and GPU pages and set their default values
|
||||
u64* const alloc = new u64[num_words * 3];
|
||||
cpu.heap = alloc;
|
||||
gpu.heap = alloc + num_words;
|
||||
untracked.heap = alloc + num_words * 2;
|
||||
std::fill_n(cpu.heap, num_words, ~u64{0});
|
||||
std::fill_n(gpu.heap, num_words, 0);
|
||||
std::fill_n(untracked.heap, num_words, ~u64{0});
|
||||
}
|
||||
// Clean up tailing bits
|
||||
const u64 last_word_size = size_bytes % BYTES_PER_WORD;
|
||||
const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
|
||||
const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
|
||||
const u64 last_word = (~u64{0} << shift) >> shift;
|
||||
cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
|
||||
untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
|
||||
}
|
||||
|
||||
~Words() {
|
||||
Release();
|
||||
}
|
||||
|
||||
Words& operator=(Words&& rhs) noexcept {
|
||||
Release();
|
||||
size_bytes = rhs.size_bytes;
|
||||
num_words = rhs.num_words;
|
||||
cpu = rhs.cpu;
|
||||
gpu = rhs.gpu;
|
||||
untracked = rhs.untracked;
|
||||
rhs.cpu.heap = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Words(Words&& rhs) noexcept
|
||||
: size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
|
||||
untracked{rhs.untracked} {
|
||||
rhs.cpu.heap = nullptr;
|
||||
}
|
||||
|
||||
Words& operator=(const Words&) = delete;
|
||||
Words(const Words&) = delete;
|
||||
|
||||
/// Returns true when the buffer fits in the small vector optimization
|
||||
[[nodiscard]] bool IsShort() const noexcept {
|
||||
return num_words <= stack_words;
|
||||
}
|
||||
|
||||
/// Returns the number of words of the buffer
|
||||
[[nodiscard]] size_t NumWords() const noexcept {
|
||||
return num_words;
|
||||
}
|
||||
|
||||
/// Release buffer resources
|
||||
void Release() {
|
||||
if (!IsShort()) {
|
||||
// CPU written words is the base for the heap allocation
|
||||
delete[] cpu.heap;
|
||||
}
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
std::span<u64> Span() noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return std::span<u64>(cpu.Pointer(IsShort()), num_words);
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return std::span<u64>(gpu.Pointer(IsShort()), num_words);
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return std::span<u64>(untracked.Pointer(IsShort()), num_words);
|
||||
}
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
std::span<const u64> Span() const noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
|
||||
}
|
||||
}
|
||||
|
||||
u64 size_bytes = 0;
|
||||
size_t num_words = 0;
|
||||
WordsArray<stack_words> cpu;
|
||||
WordsArray<stack_words> gpu;
|
||||
WordsArray<stack_words> untracked;
|
||||
};
|
||||
|
||||
template <size_t stack_words = 1>
|
||||
class WordManager {
|
||||
/**
|
||||
* Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
|
||||
* Information is stored in bitsets for spacial locality and fast update of single pages.
|
||||
*/
|
||||
class RegionManager {
|
||||
public:
|
||||
explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes)
|
||||
: tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {}
|
||||
|
||||
explicit WordManager() = default;
|
||||
explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_)
|
||||
: tracker{tracker_}, cpu_addr{cpu_addr_} {
|
||||
cpu.fill(~u64{0});
|
||||
gpu.fill(0);
|
||||
untracked.fill(~u64{0});
|
||||
}
|
||||
explicit RegionManager() = default;
|
||||
|
||||
void SetCpuAddress(VAddr new_cpu_addr) {
|
||||
cpu_addr = new_cpu_addr;
|
||||
@ -175,12 +74,12 @@ public:
|
||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
|
||||
const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
|
||||
if (start >= SizeBytes() || end <= start) {
|
||||
if (start >= HIGHER_PAGE_SIZE || end <= start) {
|
||||
return;
|
||||
}
|
||||
auto [start_word, start_page] = GetWordPage(start);
|
||||
auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
|
||||
const size_t num_words = NumWords();
|
||||
constexpr size_t num_words = NUM_REGION_WORDS;
|
||||
start_word = std::min(start_word, num_words);
|
||||
end_word = std::min(end_word, num_words);
|
||||
const size_t diff = end_word - start_word;
|
||||
@ -225,21 +124,21 @@ public:
|
||||
*/
|
||||
template <Type type, bool enable>
|
||||
void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
|
||||
std::span<u64> state_words = words.template Span<type>();
|
||||
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
|
||||
std::scoped_lock lk{lock};
|
||||
std::span<u64> state_words = Span<type>();
|
||||
IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
|
||||
if constexpr (type == Type::CPU) {
|
||||
NotifyPageTracker<!enable>(index, untracked_words[index], mask);
|
||||
UpdateProtection<!enable>(index, untracked[index], mask);
|
||||
}
|
||||
if constexpr (enable) {
|
||||
state_words[index] |= mask;
|
||||
if constexpr (type == Type::CPU) {
|
||||
untracked_words[index] |= mask;
|
||||
untracked[index] |= mask;
|
||||
}
|
||||
} else {
|
||||
state_words[index] &= ~mask;
|
||||
if constexpr (type == Type::CPU) {
|
||||
untracked_words[index] &= ~mask;
|
||||
untracked[index] &= ~mask;
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -255,10 +154,10 @@ public:
|
||||
*/
|
||||
template <Type type, bool clear, typename Func>
|
||||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
|
||||
std::scoped_lock lk{lock};
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
std::span<u64> state_words = words.template Span<type>();
|
||||
[[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
|
||||
std::span<u64> state_words = Span<type>();
|
||||
const size_t offset = query_cpu_range - cpu_addr;
|
||||
bool pending = false;
|
||||
size_t pending_offset{};
|
||||
@ -269,16 +168,16 @@ public:
|
||||
};
|
||||
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
||||
if constexpr (type == Type::GPU) {
|
||||
mask &= ~untracked_words[index];
|
||||
mask &= ~untracked[index];
|
||||
}
|
||||
const u64 word = state_words[index] & mask;
|
||||
if constexpr (clear) {
|
||||
if constexpr (type == Type::CPU) {
|
||||
NotifyPageTracker<true>(index, untracked_words[index], mask);
|
||||
UpdateProtection<true>(index, untracked[index], mask);
|
||||
}
|
||||
state_words[index] &= ~mask;
|
||||
if constexpr (type == Type::CPU) {
|
||||
untracked_words[index] &= ~mask;
|
||||
untracked[index] &= ~mask;
|
||||
}
|
||||
}
|
||||
const size_t base_offset = index * PAGES_PER_WORD;
|
||||
@ -315,13 +214,11 @@ public:
|
||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||
static_assert(type != Type::Untracked);
|
||||
|
||||
const std::span<const u64> state_words = words.template Span<type>();
|
||||
[[maybe_unused]] const std::span<const u64> untracked_words =
|
||||
words.template Span<Type::Untracked>();
|
||||
const std::span<const u64> state_words = Span<type>();
|
||||
bool result = false;
|
||||
IterateWords(offset, size, [&](size_t index, u64 mask) {
|
||||
if constexpr (type == Type::GPU) {
|
||||
mask &= ~untracked_words[index];
|
||||
mask &= ~untracked[index];
|
||||
}
|
||||
const u64 word = state_words[index] & mask;
|
||||
if (word != 0) {
|
||||
@ -333,44 +230,7 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Returns the number of words of the manager
|
||||
[[nodiscard]] size_t NumWords() const noexcept {
|
||||
return words.NumWords();
|
||||
}
|
||||
|
||||
/// Returns the size in bytes of the manager
|
||||
[[nodiscard]] u64 SizeBytes() const noexcept {
|
||||
return words.size_bytes;
|
||||
}
|
||||
|
||||
/// Returns true when the buffer fits in the small vector optimization
|
||||
[[nodiscard]] bool IsShort() const noexcept {
|
||||
return words.IsShort();
|
||||
}
|
||||
|
||||
private:
|
||||
template <Type type>
|
||||
u64* Array() noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return words.cpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return words.gpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return words.untracked.Pointer(IsShort());
|
||||
}
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
const u64* Array() const noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return words.cpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return words.gpu.Pointer(IsShort());
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return words.untracked.Pointer(IsShort());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify tracker about changes in the CPU tracking state of a word in the buffer
|
||||
*
|
||||
@ -381,7 +241,7 @@ private:
|
||||
* @tparam add_to_tracker True when the tracker should start tracking the new pages
|
||||
*/
|
||||
template <bool add_to_tracker>
|
||||
void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const {
|
||||
void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
|
||||
u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
|
||||
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
|
||||
IteratePages(changed_bits, [&](size_t offset, size_t size) {
|
||||
@ -390,9 +250,34 @@ private:
|
||||
});
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
std::span<u64> Span() noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return cpu;
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return gpu;
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return untracked;
|
||||
}
|
||||
}
|
||||
|
||||
template <Type type>
|
||||
std::span<const u64> Span() const noexcept {
|
||||
if constexpr (type == Type::CPU) {
|
||||
return cpu;
|
||||
} else if constexpr (type == Type::GPU) {
|
||||
return gpu;
|
||||
} else if constexpr (type == Type::Untracked) {
|
||||
return untracked;
|
||||
}
|
||||
}
|
||||
|
||||
Common::SpinLock lock;
|
||||
PageManager* tracker;
|
||||
VAddr cpu_addr = 0;
|
||||
Words<stack_words> words;
|
||||
WordsArray cpu;
|
||||
WordsArray gpu;
|
||||
WordsArray untracked;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -2,13 +2,14 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(SHADER_FILES
|
||||
detile_m8x1.comp
|
||||
detile_m8x2.comp
|
||||
detile_m32x1.comp
|
||||
detile_m32x2.comp
|
||||
detile_m32x4.comp
|
||||
detile_macro32x1.comp
|
||||
detile_macro32x2.comp
|
||||
detilers/macro_32bpp.comp
|
||||
detilers/macro_64bpp.comp
|
||||
detilers/macro_8bpp.comp
|
||||
detilers/micro_128bpp.comp
|
||||
detilers/micro_16bpp.comp
|
||||
detilers/micro_32bpp.comp
|
||||
detilers/micro_64bpp.comp
|
||||
detilers/micro_8bpp.comp
|
||||
fs_tri.vert
|
||||
post_process.frag
|
||||
)
|
||||
|
@ -87,7 +87,7 @@ void main() {
|
||||
uint offs = slice_offs + tile_offs + (idx * BPP / 8);
|
||||
|
||||
uint p0 = in_data[(offs >> 2) + 0];
|
||||
uint p1 = in_data[(offs >> 2) + 1];
|
||||
uint p1 = in_data[(offs >> 2) + 1];
|
||||
out_data[2 * gl_GlobalInvocationID.x + 0] = p0;
|
||||
out_data[2 * gl_GlobalInvocationID.x + 1] = p1;
|
||||
out_data[2 * gl_GlobalInvocationID.x + 1] = p1;
|
||||
}
|
101
src/video_core/host_shaders/detilers/macro_8bpp.comp
Normal file
101
src/video_core/host_shaders/detilers/macro_8bpp.comp
Normal file
@ -0,0 +1,101 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint c0;
|
||||
uint c1;
|
||||
} info;
|
||||
|
||||
const uint lut_8bpp[][16] = {
|
||||
{
|
||||
0x05040100, 0x45444140,
|
||||
0x07060302, 0x47464342,
|
||||
0x0d0c0908, 0x4d4c4948,
|
||||
0x0f0e0b0a, 0x4f4e4b4a,
|
||||
0x85848180, 0xc5c4c1c0,
|
||||
0x87868382, 0xc7c6c3c2,
|
||||
0x8d8c8988, 0xcdccc9c8,
|
||||
0x8f8e8b8a, 0xcfcecbca,
|
||||
},
|
||||
{
|
||||
0x15141110, 0x55545150,
|
||||
0x17161312, 0x57565352,
|
||||
0x1d1c1918, 0x5d5c5958,
|
||||
0x1f1e1b1a, 0x5f5e5b5a,
|
||||
0x95949190, 0xd5d4d1d0,
|
||||
0x97969392, 0xd7d6d3d2,
|
||||
0x9d9c9998, 0xdddcd9d8,
|
||||
0x9f9e9b9a, 0xdfdedbda,
|
||||
},
|
||||
{
|
||||
0x25242120, 0x65646160,
|
||||
0x27262322, 0x67666362,
|
||||
0x2d2c2928, 0x6d6c6968,
|
||||
0x2f2e2b2a, 0x6f6e6b6a,
|
||||
0xa5a4a1a0, 0xe5e4e1e0,
|
||||
0xa7a6a3a2, 0xe7e6e3e2,
|
||||
0xadaca9a8, 0xedece9e8,
|
||||
0xafaeabaa, 0xefeeebea,
|
||||
},
|
||||
{
|
||||
0x35343130, 0x75747170,
|
||||
0x37363332, 0x77767372,
|
||||
0x3d3c3938, 0x7d7c7978,
|
||||
0x3f3e3b3a, 0x7f7e7b7a,
|
||||
0xb5b4b1b0, 0xf5f4f1f0,
|
||||
0xb7b6b3b2, 0xf7f6f3f2,
|
||||
0xbdbcb9b8, 0xfdfcf9f8,
|
||||
0xbfbebbba, 0xfffefbfa,
|
||||
},
|
||||
};
|
||||
|
||||
#define MICRO_TILE_DIM (8)
|
||||
#define MICRO_TILE_SZ (256)
|
||||
#define TEXELS_PER_ELEMENT (1)
|
||||
#define BPP (8)
|
||||
|
||||
shared uint scratch[16];
|
||||
|
||||
void main() {
|
||||
uint slot = gl_LocalInvocationID.x >> 2u;
|
||||
atomicAnd(scratch[slot], 0);
|
||||
|
||||
uint x = gl_GlobalInvocationID.x % info.pitch;
|
||||
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height;
|
||||
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height);
|
||||
|
||||
uint col = bitfieldExtract(x, 0, 3);
|
||||
uint row = bitfieldExtract(y, 0, 3);
|
||||
uint lut = bitfieldExtract(z, 0, 2);
|
||||
uint idx_dw = lut_8bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u];
|
||||
uint byte_ofs = (gl_LocalInvocationID.x & 3u) * 8;
|
||||
uint idx = bitfieldExtract(idx_dw >> byte_ofs, 0, 8);
|
||||
|
||||
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
|
||||
uint tile_row = y / MICRO_TILE_DIM;
|
||||
uint tile_column = x / MICRO_TILE_DIM;
|
||||
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ;
|
||||
uint offs = (slice_offs + tile_offs) + (idx * BPP / 8);
|
||||
|
||||
uint p0 = in_data[offs >> 2u];
|
||||
uint byte = bitfieldExtract(p0 >> (offs * 8), 0, 8);
|
||||
atomicOr(scratch[slot], byte << byte_ofs);
|
||||
|
||||
if (byte_ofs == 0) {
|
||||
out_data[gl_GlobalInvocationID.x >> 2u] = scratch[slot];
|
||||
}
|
||||
}
|
@ -39,6 +39,15 @@ public:
|
||||
return &(*first_level_map[l1_page])[l2_page];
|
||||
}
|
||||
|
||||
[[nodiscard]] const Entry* find(size_t page) const {
|
||||
const size_t l1_page = page >> SecondLevelBits;
|
||||
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||
if (!first_level_map[l1_page]) {
|
||||
return nullptr;
|
||||
}
|
||||
return &(*first_level_map[l1_page])[l2_page];
|
||||
}
|
||||
|
||||
[[nodiscard]] const Entry& operator[](size_t page) const {
|
||||
const size_t l1_page = page >> SecondLevelBits;
|
||||
const size_t l2_page = page & (NumEntriesPerL1Page - 1);
|
||||
|
@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
|
||||
void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
|
||||
static constexpr u64 PageShift = 12;
|
||||
|
||||
std::scoped_lock lk{mutex};
|
||||
std::scoped_lock lk{lock};
|
||||
const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
|
||||
const u64 page_start = addr >> PageShift;
|
||||
const u64 page_end = page_start + num_pages;
|
||||
|
@ -4,8 +4,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include "common/spin_lock.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@ -35,8 +35,8 @@ private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
Vulkan::Rasterizer* rasterizer;
|
||||
std::mutex mutex;
|
||||
boost::icl::interval_map<VAddr, s32> cached_pages;
|
||||
Common::SpinLock lock;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags(
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
return ImageRead | Mrt;
|
||||
case AmdGpu::NumberFormat::Ubnorm:
|
||||
case AmdGpu::NumberFormat::UbnromNz:
|
||||
case AmdGpu::NumberFormat::UbnormNz:
|
||||
case AmdGpu::NumberFormat::Ubint:
|
||||
case AmdGpu::NumberFormat::Ubscaled:
|
||||
return ImageRead;
|
||||
@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor
|
||||
}
|
||||
|
||||
std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
// Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader.
|
||||
static constexpr std::array formats{
|
||||
// Invalid
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm,
|
||||
@ -490,7 +491,7 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz,
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint,
|
||||
vk::Format::eUndefined),
|
||||
@ -501,10 +502,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint,
|
||||
@ -516,10 +513,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint,
|
||||
@ -531,10 +524,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR8G8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint,
|
||||
@ -553,10 +542,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR16G16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16G16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16G16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint,
|
||||
@ -573,10 +558,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eA2B10G10R10UnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eA2B10G10R10SnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eA2B10G10R10UscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eA2B10G10R10SscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eA2B10G10R10UintPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint,
|
||||
@ -586,10 +567,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR8G8B8A8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8B8A8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8B8A8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8B8A8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8B8A8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint,
|
||||
@ -608,10 +585,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR16G16B16A16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16B16A16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16B16A16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint,
|
||||
@ -691,16 +664,40 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
return formats;
|
||||
}
|
||||
|
||||
// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
|
||||
static const size_t amd_gpu_data_format_bit_size = 6; // All values are under 64
|
||||
static const size_t amd_gpu_number_format_bit_size = 4; // All values are under 16
|
||||
|
||||
static size_t GetSurfaceFormatTableIndex(AmdGpu::DataFormat data_format,
|
||||
AmdGpu::NumberFormat num_format) {
|
||||
DEBUG_ASSERT(u32(data_format) < 1 << amd_gpu_data_format_bit_size);
|
||||
DEBUG_ASSERT(u32(num_format) < 1 << amd_gpu_number_format_bit_size);
|
||||
size_t result = static_cast<size_t>(num_format) |
|
||||
(static_cast<size_t>(data_format) << amd_gpu_number_format_bit_size);
|
||||
return result;
|
||||
}
|
||||
|
||||
static auto surface_format_table = []() constexpr {
|
||||
std::array<vk::Format, 1 << amd_gpu_data_format_bit_size * 1 << amd_gpu_number_format_bit_size>
|
||||
result;
|
||||
for (auto& entry : result) {
|
||||
entry = vk::Format::eUndefined;
|
||||
}
|
||||
for (const auto& supported_format : SurfaceFormats()) {
|
||||
result[GetSurfaceFormatTableIndex(supported_format.data_format,
|
||||
supported_format.number_format)] =
|
||||
supported_format.vk_format;
|
||||
}
|
||||
return result;
|
||||
}();
|
||||
|
||||
vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
|
||||
const auto& formats = SurfaceFormats();
|
||||
const auto format =
|
||||
std::find_if(formats.begin(), formats.end(), [&](const SurfaceFormatInfo& format_info) {
|
||||
return format_info.data_format == data_format &&
|
||||
format_info.number_format == num_format;
|
||||
});
|
||||
ASSERT_MSG(format != formats.end(), "Unknown data_format={} and num_format={}",
|
||||
static_cast<u32>(data_format), static_cast<u32>(num_format));
|
||||
return format->vk_format;
|
||||
vk::Format result = surface_format_table[GetSurfaceFormatTableIndex(data_format, num_format)];
|
||||
bool found =
|
||||
result != vk::Format::eUndefined || data_format == AmdGpu::DataFormat::FormatInvalid;
|
||||
ASSERT_MSG(found, "Unknown data_format={} and num_format={}", static_cast<u32>(data_format),
|
||||
static_cast<u32>(num_format));
|
||||
return result;
|
||||
}
|
||||
|
||||
static constexpr DepthFormatInfo CreateDepthFormatInfo(
|
||||
@ -746,8 +743,8 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
|
||||
|
||||
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
|
||||
const auto comp_swizzle = color_buffer.Swizzle();
|
||||
const auto format = color_buffer.DataFormat();
|
||||
const auto number_type = color_buffer.NumFormat();
|
||||
const auto format = color_buffer.GetDataFmt();
|
||||
const auto number_type = color_buffer.GetNumberFmt();
|
||||
|
||||
const auto& c0 = color_buffer.clear_word0;
|
||||
const auto& c1 = color_buffer.clear_word1;
|
||||
|
@ -18,6 +18,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
||||
auto& info = stages[int(Shader::LogicalStage::Compute)];
|
||||
info = &info_;
|
||||
const auto debug_str = GetDebugString();
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
@ -89,8 +90,9 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
const auto device = instance.GetDevice();
|
||||
auto [descriptor_set_result, descriptor_set] =
|
||||
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
device.createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess,
|
||||
"Failed to create compute descriptor set layout: {}",
|
||||
vk::to_string(descriptor_set_result));
|
||||
@ -107,6 +109,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||
"Failed to create compute pipeline layout: {}", vk::to_string(layout_result));
|
||||
pipeline_layout = std::move(layout);
|
||||
SetObjectName(device, *pipeline_layout, "Compute PipelineLayout {}", debug_str);
|
||||
|
||||
const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
|
||||
.stage = shader_ci,
|
||||
@ -117,6 +120,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
pipeline = std::move(pipe);
|
||||
SetObjectName(device, *pipeline, "Compute Pipeline {}", debug_str);
|
||||
}
|
||||
|
||||
ComputePipeline::~ComputePipeline() = default;
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
@ -16,6 +15,7 @@
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
@ -36,6 +36,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
const vk::Device device = instance.GetDevice();
|
||||
std::ranges::copy(infos, stages.begin());
|
||||
BuildDescSetLayout();
|
||||
const auto debug_str = GetDebugString();
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = gp_stage_flags,
|
||||
@ -54,6 +55,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||
"Failed to create graphics pipeline layout: {}", vk::to_string(layout_result));
|
||||
pipeline_layout = std::move(layout);
|
||||
SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);
|
||||
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
@ -322,6 +324,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
pipeline = std::move(pipe);
|
||||
SetObjectName(device, *pipeline, "Graphics Pipeline {}", debug_str);
|
||||
}
|
||||
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
@ -32,6 +32,7 @@ struct GraphicsPipelineKey {
|
||||
u32 num_color_attachments;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
|
||||
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
@ -92,13 +92,15 @@ std::string GetReadableVersion(u32 version) {
|
||||
Instance::Instance(bool enable_validation, bool enable_crash_diagnostic)
|
||||
: instance{CreateInstance(Frontend::WindowSystemType::Headless, enable_validation,
|
||||
enable_crash_diagnostic)},
|
||||
physical_devices{EnumeratePhysicalDevices(instance)} {}
|
||||
physical_devices{EnumeratePhysicalDevices(instance)},
|
||||
crash_diagnostic{enable_crash_diagnostic} {}
|
||||
|
||||
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
|
||||
bool enable_validation /*= false*/, bool enable_crash_diagnostic /*= false*/)
|
||||
: instance{CreateInstance(window.GetWindowInfo().type, enable_validation,
|
||||
enable_crash_diagnostic)},
|
||||
physical_devices{EnumeratePhysicalDevices(instance)} {
|
||||
physical_devices{EnumeratePhysicalDevices(instance)},
|
||||
crash_diagnostic{enable_crash_diagnostic} {
|
||||
if (enable_validation) {
|
||||
debug_callback = CreateDebugCallback(*instance);
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ public:
|
||||
|
||||
/// Returns true when a known debugging tool is attached.
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_renderdoc || has_nsight_graphics;
|
||||
return crash_diagnostic || has_renderdoc || has_nsight_graphics;
|
||||
}
|
||||
|
||||
/// Returns true if anisotropic filtering is supported
|
||||
@ -338,6 +338,7 @@ private:
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool crash_diagnostic{};
|
||||
bool has_nsight_graphics{};
|
||||
bool has_renderdoc{};
|
||||
};
|
||||
|
@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||
info.fs_info.color_buffers[i] = {
|
||||
.num_format = graphics_key.color_num_formats[i],
|
||||
.num_conversion = graphics_key.color_num_conversions[i],
|
||||
.swizzle = graphics_key.color_swizzles[i],
|
||||
};
|
||||
}
|
||||
@ -302,6 +303,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.num_color_attachments = 0;
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.color_swizzles.fill({});
|
||||
@ -328,8 +330,9 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
}
|
||||
|
||||
key.color_formats[remapped_cb] =
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.DataFormat(), col_buf.NumFormat());
|
||||
key.color_num_formats[remapped_cb] = col_buf.NumFormat();
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||
key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
|
||||
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
|
||||
key.color_swizzles[remapped_cb] = col_buf.Swizzle();
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
@ -55,4 +56,19 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers&
|
||||
cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {});
|
||||
}
|
||||
|
||||
std::string Pipeline::GetDebugString() const {
|
||||
std::string stage_desc;
|
||||
for (const auto& stage : stages) {
|
||||
if (stage) {
|
||||
const auto shader_name = PipelineCache::GetShaderName(stage->stage, stage->pgm_hash);
|
||||
if (stage_desc.empty()) {
|
||||
stage_desc = shader_name;
|
||||
} else {
|
||||
stage_desc = fmt::format("{},{}", stage_desc, shader_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return stage_desc;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user