mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-05 17:02:40 +00:00
Merge branch 'shadps4-emu:main' into gcc-ci
This commit is contained in:
commit
7408a549e8
6
.github/ISSUE_TEMPLATE/game-bug-report.yaml
vendored
6
.github/ISSUE_TEMPLATE/game-bug-report.yaml
vendored
@ -89,7 +89,7 @@ body:
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: "Logs"
|
||||
description: Attach any logs here. Log can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
|
||||
label: "Log File"
|
||||
description: Drag and drop the log file here. It can be found by right clicking on a game name -> Open Folder... -> Open Log Folder. Make sure that the log type is set to `sync`.
|
||||
validations:
|
||||
required: false
|
||||
required: true
|
||||
|
10
.github/workflows/build.yml
vendored
10
.github/workflows/build.yml
vendored
@ -14,14 +14,14 @@ env:
|
||||
|
||||
jobs:
|
||||
reuse:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: fsfe/reuse-action@v5
|
||||
|
||||
clang-format:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@ -39,7 +39,7 @@ jobs:
|
||||
run: ./.ci/clang-format.sh
|
||||
|
||||
get-info:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
date: ${{ steps.vars.outputs.date }}
|
||||
shorthash: ${{ steps.vars.outputs.shorthash }}
|
||||
@ -57,7 +57,7 @@ jobs:
|
||||
echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
|
||||
|
||||
windows-sdl:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2025
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@ -101,7 +101,7 @@ jobs:
|
||||
path: ${{github.workspace}}/build/shadPS4.exe
|
||||
|
||||
windows-qt:
|
||||
runs-on: windows-latest
|
||||
runs-on: windows-2025
|
||||
needs: get-info
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -209,6 +209,7 @@ set(AUDIO_LIB src/core/libraries/audio/audioin.cpp
|
||||
|
||||
set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp
|
||||
src/core/libraries/gnmdriver/gnmdriver.h
|
||||
src/core/libraries/gnmdriver/gnmdriver_init.h
|
||||
src/core/libraries/gnmdriver/gnm_error.h
|
||||
)
|
||||
|
||||
@ -335,6 +336,8 @@ set(SYSTEM_LIBS src/core/libraries/system/commondialog.cpp
|
||||
src/core/libraries/share_play/shareplay.h
|
||||
src/core/libraries/razor_cpu/razor_cpu.cpp
|
||||
src/core/libraries/razor_cpu/razor_cpu.h
|
||||
src/core/libraries/mouse/mouse.cpp
|
||||
src/core/libraries/mouse/mouse.h
|
||||
)
|
||||
|
||||
set(VIDEOOUT_LIB src/core/libraries/videoout/buffer.h
|
||||
|
@ -47,6 +47,7 @@ static std::string updateChannel;
|
||||
static std::string backButtonBehavior = "left";
|
||||
static bool useSpecialPad = false;
|
||||
static int specialPadClass = 1;
|
||||
static bool isMotionControlsEnabled = true;
|
||||
static bool isDebugDump = false;
|
||||
static bool isShaderDebug = false;
|
||||
static bool isShowSplash = false;
|
||||
@ -100,7 +101,7 @@ void setTrophyKey(std::string key) {
|
||||
trophyKey = key;
|
||||
}
|
||||
|
||||
bool isNeoMode() {
|
||||
bool isNeoModeConsole() {
|
||||
return isNeo;
|
||||
}
|
||||
|
||||
@ -172,6 +173,10 @@ int getSpecialPadClass() {
|
||||
return specialPadClass;
|
||||
}
|
||||
|
||||
bool getIsMotionControlsEnabled() {
|
||||
return isMotionControlsEnabled;
|
||||
}
|
||||
|
||||
bool debugDump() {
|
||||
return isDebugDump;
|
||||
}
|
||||
@ -368,6 +373,10 @@ void setSpecialPadClass(int type) {
|
||||
specialPadClass = type;
|
||||
}
|
||||
|
||||
void setIsMotionControlsEnabled(bool use) {
|
||||
isMotionControlsEnabled = use;
|
||||
}
|
||||
|
||||
void setSeparateUpdateEnabled(bool use) {
|
||||
separateupdatefolder = use;
|
||||
}
|
||||
@ -594,6 +603,7 @@ void load(const std::filesystem::path& path) {
|
||||
backButtonBehavior = toml::find_or<std::string>(input, "backButtonBehavior", "left");
|
||||
useSpecialPad = toml::find_or<bool>(input, "useSpecialPad", false);
|
||||
specialPadClass = toml::find_or<int>(input, "specialPadClass", 1);
|
||||
isMotionControlsEnabled = toml::find_or<bool>(input, "isMotionControlsEnabled", true);
|
||||
}
|
||||
|
||||
if (data.contains("GPU")) {
|
||||
@ -709,6 +719,7 @@ void save(const std::filesystem::path& path) {
|
||||
data["Input"]["backButtonBehavior"] = backButtonBehavior;
|
||||
data["Input"]["useSpecialPad"] = useSpecialPad;
|
||||
data["Input"]["specialPadClass"] = specialPadClass;
|
||||
data["Input"]["isMotionControlsEnabled"] = isMotionControlsEnabled;
|
||||
data["GPU"]["screenWidth"] = screenWidth;
|
||||
data["GPU"]["screenHeight"] = screenHeight;
|
||||
data["GPU"]["nullGpu"] = isNullGpu;
|
||||
|
@ -18,7 +18,7 @@ void saveMainWindow(const std::filesystem::path& path);
|
||||
std::string getTrophyKey();
|
||||
void setTrophyKey(std::string key);
|
||||
|
||||
bool isNeoMode();
|
||||
bool isNeoModeConsole();
|
||||
bool isFullscreenMode();
|
||||
bool getPlayBGM();
|
||||
int getBGMvolume();
|
||||
@ -38,6 +38,7 @@ int getCursorHideTimeout();
|
||||
std::string getBackButtonBehavior();
|
||||
bool getUseSpecialPad();
|
||||
int getSpecialPadClass();
|
||||
bool getIsMotionControlsEnabled();
|
||||
|
||||
u32 getScreenWidth();
|
||||
u32 getScreenHeight();
|
||||
@ -84,6 +85,7 @@ void setCursorHideTimeout(int newcursorHideTimeout);
|
||||
void setBackButtonBehavior(const std::string& type);
|
||||
void setUseSpecialPad(bool use);
|
||||
void setSpecialPadClass(int type);
|
||||
void setIsMotionControlsEnabled(bool use);
|
||||
|
||||
void setLogType(const std::string& type);
|
||||
void setLogFilter(const std::string& type);
|
||||
@ -139,4 +141,4 @@ void setDefaultValues();
|
||||
|
||||
// settings
|
||||
u32 GetLanguage();
|
||||
}; // namespace Config
|
||||
}; // namespace Config
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <string_view>
|
||||
|
||||
#include "assert.h"
|
||||
#include "bit_field.h"
|
||||
#include "singleton.h"
|
||||
#include "types.h"
|
||||
|
||||
@ -16,6 +17,46 @@ class Emulator;
|
||||
|
||||
namespace Common {
|
||||
|
||||
union PSFAttributes {
|
||||
/// Supports initial user's logout
|
||||
BitField<0, 1, u32> support_initial_user_logout;
|
||||
/// Enter button for the common dialog is cross.
|
||||
BitField<1, 1, u32> enter_button_cross;
|
||||
/// Warning dialog for PS Move is displayed in the options menu.
|
||||
BitField<2, 1, u32> ps_move_warning;
|
||||
/// Supports stereoscopic 3D.
|
||||
BitField<3, 1, u32> support_stereoscopic_3d;
|
||||
/// Suspends when PS button is pressed.
|
||||
BitField<4, 1, u32> ps_button_suspend;
|
||||
/// Enter button for the common dialog is assigned by the system software.
|
||||
BitField<5, 1, u32> enter_button_system;
|
||||
/// Overrides share menu behavior.
|
||||
BitField<6, 1, u32> override_share_menu;
|
||||
/// Suspends when PS button is pressed and special output resolution is set.
|
||||
BitField<8, 1, u32> special_res_ps_button_suspend;
|
||||
/// Enable HDCP.
|
||||
BitField<9, 1, u32> enable_hdcp;
|
||||
/// Disable HDCP for non-game.
|
||||
BitField<10, 1, u32> disable_hdcp_non_game;
|
||||
/// Supports PS VR.
|
||||
BitField<14, 1, u32> support_ps_vr;
|
||||
/// CPU mode (6 CPU)
|
||||
BitField<15, 1, u32> six_cpu_mode;
|
||||
/// CPU mode (7 CPU)
|
||||
BitField<16, 1, u32> seven_cpu_mode;
|
||||
/// Supports PS4 Pro (Neo) mode.
|
||||
BitField<23, 1, u32> support_neo_mode;
|
||||
/// Requires PS VR.
|
||||
BitField<26, 1, u32> require_ps_vr;
|
||||
/// Supports HDR.
|
||||
BitField<29, 1, u32> support_hdr;
|
||||
/// Display location.
|
||||
BitField<31, 1, u32> display_location;
|
||||
|
||||
u32 raw{};
|
||||
};
|
||||
static_assert(sizeof(PSFAttributes) == 4);
|
||||
|
||||
class ElfInfo {
|
||||
friend class Core::Emulator;
|
||||
|
||||
@ -26,6 +67,7 @@ class ElfInfo {
|
||||
std::string app_ver{};
|
||||
u32 firmware_ver = 0;
|
||||
u32 raw_firmware_ver = 0;
|
||||
PSFAttributes psf_attributes{};
|
||||
|
||||
public:
|
||||
static constexpr u32 FW_15 = 0x1500000;
|
||||
@ -68,6 +110,11 @@ public:
|
||||
ASSERT(initialized);
|
||||
return raw_firmware_ver;
|
||||
}
|
||||
|
||||
[[nodiscard]] const PSFAttributes& PSFAttributes() const {
|
||||
ASSERT(initialized);
|
||||
return psf_attributes;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Common
|
||||
|
@ -126,6 +126,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
|
||||
SUB(Lib, Vdec2) \
|
||||
SUB(Lib, Videodec) \
|
||||
SUB(Lib, RazorCpu) \
|
||||
SUB(Lib, Mouse) \
|
||||
CLS(Frontend) \
|
||||
CLS(Render) \
|
||||
SUB(Render, Vulkan) \
|
||||
|
@ -93,6 +93,7 @@ enum class Class : u8 {
|
||||
Lib_Vdec2, ///< The LibSceVideodec2 implementation.
|
||||
Lib_Videodec, ///< The LibSceVideodec implementation.
|
||||
Lib_RazorCpu, ///< The LibRazorCpu implementation.
|
||||
Lib_Mouse, ///< The LibSceMouse implementation
|
||||
Frontend, ///< Emulator UI
|
||||
Render, ///< Video Core
|
||||
Render_Vulkan, ///< Vulkan backend
|
||||
|
@ -15,13 +15,6 @@ class SDLPortBackend : public PortBackend {
|
||||
public:
|
||||
explicit SDLPortBackend(const PortOut& port)
|
||||
: frame_size(port.format_info.FrameSize()), guest_buffer_size(port.BufferSize()) {
|
||||
// We want the latency for delivering frames out to be as small as possible,
|
||||
// so set the sample frames hint to the number of frames per buffer.
|
||||
const auto samples_num_str = std::to_string(port.buffer_frames);
|
||||
if (!SDL_SetHint(SDL_HINT_AUDIO_DEVICE_SAMPLE_FRAMES, samples_num_str.c_str())) {
|
||||
LOG_WARNING(Lib_AudioOut, "Failed to set SDL audio sample frames hint to {}: {}",
|
||||
samples_num_str, SDL_GetError());
|
||||
}
|
||||
const SDL_AudioSpec fmt = {
|
||||
.format = port.format_info.is_float ? SDL_AUDIO_F32LE : SDL_AUDIO_S16LE,
|
||||
.channels = port.format_info.num_channels,
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "core/address_space.h"
|
||||
#include "core/debug_state.h"
|
||||
#include "core/libraries/gnmdriver/gnm_error.h"
|
||||
#include "core/libraries/gnmdriver/gnmdriver_init.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
#include "core/libraries/libs.h"
|
||||
@ -54,244 +55,11 @@ enum ShaderStages : u32 {
|
||||
|
||||
static constexpr std::array indirect_sgpr_offsets{0u, 0u, 0x4cu, 0u, 0xccu, 0u, 0x14cu};
|
||||
|
||||
static constexpr auto HwInitPacketSize = 0x100u;
|
||||
|
||||
// clang-format off
|
||||
static constexpr std::array InitSequence{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6000000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence.size() == 0x73 + 2);
|
||||
|
||||
static constexpr std::array InitSequence175{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence175.size() == 0x73 + 2);
|
||||
|
||||
static constexpr std::array InitSequence200{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence200.size() == 0x76 + 2);
|
||||
|
||||
static constexpr std::array InitSequence350{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
};
|
||||
static_assert(InitSequence350.size() == 0x7c + 2);
|
||||
|
||||
static constexpr std::array CtxInitSequence{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0111000u, 0u
|
||||
};
|
||||
static_assert(CtxInitSequence.size() == 0x0f);
|
||||
|
||||
static constexpr std::array CtxInitSequence400{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
0xc09e1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400.size() == 0x61);
|
||||
// clang-format on
|
||||
// Gates use of what appear to be the neo-mode init sequences but with the older
|
||||
// IA_MULTI_VGT_PARAM register address. No idea what this is for as the ioctl
|
||||
// that controls it is still a mystery, but leaving the sequences in gated behind
|
||||
// this flag in case we need it in the future.
|
||||
static constexpr bool UseNeoCompatSequences = false;
|
||||
|
||||
// In case if `submitDone` is issued we need to block submissions until GPU idle
|
||||
static u32 submission_lock{};
|
||||
@ -317,6 +85,14 @@ static void WaitGpuIdle() {
|
||||
cv_lock.wait(lock, [] { return submission_lock == 0; });
|
||||
}
|
||||
|
||||
// Write a special ending NOP packet with N DWs data block
|
||||
static inline u32* WriteTrailingNop(u32* cmdbuf, u32 data_block_size) {
|
||||
auto* nop = reinterpret_cast<PM4CmdNop*>(cmdbuf);
|
||||
nop->header = PM4Type3Header{PM4ItOpcode::Nop, data_block_size - 1};
|
||||
nop->data_block[0] = 0u; // only one out of `data_block_size` is initialized
|
||||
return cmdbuf + data_block_size + 1 /* header */;
|
||||
}
|
||||
|
||||
// Write a special ending NOP packet with N DWs data block
|
||||
template <u32 data_block_size>
|
||||
static inline u32* WriteTrailingNop(u32* cmdbuf) {
|
||||
@ -607,9 +383,16 @@ s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset,
|
||||
return -1;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec() {
|
||||
LOG_ERROR(Lib_GnmDriver, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier) {
|
||||
if (cmdbuf != nullptr && size == 8 && args != 0 && ((args & 3u) == 0)) {
|
||||
cmdbuf[0] = 0xc0021602 | (modifier & 1u);
|
||||
*(VAddr*)(&cmdbuf[1]) = args;
|
||||
cmdbuf[3] = (modifier & 0x18) | 1u;
|
||||
cmdbuf[4] = 0xc0021000;
|
||||
cmdbuf[5] = 0;
|
||||
return ORBIS_OK;
|
||||
}
|
||||
return ORBIS_FAIL;
|
||||
}
|
||||
|
||||
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
@ -619,17 +402,30 @@ u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x216u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x217u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
||||
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x216u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE0
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x217u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE1
|
||||
|
||||
if (sceKernelIsNeoMode()) {
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x219u,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE2
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x21au,
|
||||
0xffffffffu); // COMPUTE_STATIC_THREAD_MGMT_SE3
|
||||
}
|
||||
|
||||
cmdbuf = PM4CmdSetData::SetShReg<PM4ShaderType::ShaderCompute>(
|
||||
cmdbuf, 0x215u, 0x170u); // COMPUTE_RESOURCE_LIMITS
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::AcquireMem>(cmdbuf, 6);
|
||||
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0u);
|
||||
cmdbuf = WriteBody(cmdbuf, 0x28000000u, 0u, 0u, 0u, 0u, 0xau);
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 0xef);
|
||||
cmdbuf = WriteBody(cmdbuf, 0xau, 0u);
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, sceKernelIsNeoMode() ? 0xe9 : 0xef);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
return HwInitPacketSize;
|
||||
}
|
||||
|
||||
@ -646,7 +442,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr
|
||||
draw_index->index_base_lo = u32(index_addr);
|
||||
draw_index->index_base_hi = u32(index_addr >> 32);
|
||||
draw_index->index_count = index_count;
|
||||
draw_index->draw_initiator = 0;
|
||||
draw_index->draw_initiator = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
|
||||
|
||||
WriteTrailingNop<3>(cmdbuf + 6);
|
||||
return ORBIS_OK;
|
||||
@ -659,8 +455,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexAuto(u32* cmdbuf, u32 size, u32 index_count, u32
|
||||
|
||||
if (cmdbuf && (size == 7) &&
|
||||
(flags & 0x1ffffffe) == 0) { // no predication will be set in the packet
|
||||
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(cmdbuf, PM4ShaderType::ShaderGraphics,
|
||||
index_count, 2u);
|
||||
cmdbuf = WritePacket<PM4ItOpcode::DrawIndexAuto>(
|
||||
cmdbuf, PM4ShaderType::ShaderGraphics, index_count,
|
||||
sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u);
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
}
|
||||
@ -684,7 +481,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirect(u32* cmdbuf, u32 size, u32 data_offset,
|
||||
cmdbuf[0] = data_offset;
|
||||
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[3] = 0;
|
||||
cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u;
|
||||
|
||||
cmdbuf += 4;
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
@ -699,8 +496,9 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
|
||||
u32 flags) {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
|
||||
if (cmdbuf && (size == 16) && (shader_stage < ShaderStages::Max) &&
|
||||
(vertex_sgpr_offset < 0x10u) && (instance_sgpr_offset < 0x10u)) {
|
||||
if ((!sceKernelIsNeoMode() || !UseNeoCompatSequences) && !cmdbuf && (size == 16) &&
|
||||
(shader_stage < ShaderStages::Max) && (vertex_sgpr_offset < 0x10u) &&
|
||||
(instance_sgpr_offset < 0x10u)) {
|
||||
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, 2);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
@ -719,7 +517,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da
|
||||
cmdbuf[4] = max_count;
|
||||
*(u64*)(&cmdbuf[5]) = count_addr;
|
||||
cmdbuf[7] = sizeof(DrawIndexedIndirectArgs);
|
||||
cmdbuf[8] = 0;
|
||||
cmdbuf[8] = sceKernelIsNeoMode() ? flags & 0xe0000000u : 0;
|
||||
|
||||
cmdbuf += 9;
|
||||
WriteTrailingNop<2>(cmdbuf);
|
||||
@ -748,7 +546,8 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexOffset(u32* cmdbuf, u32 size, u32 index_offset,
|
||||
const auto predicate = flags & 1 ? PM4Predicate::PredEnable : PM4Predicate::PredDisable;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::DrawIndexOffset2>(
|
||||
cmdbuf, 4, PM4ShaderType::ShaderGraphics, predicate);
|
||||
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count, 0u);
|
||||
cmdbuf = WriteBody(cmdbuf, index_count, index_offset, index_count,
|
||||
sceKernelIsNeoMode() ? flags & 0xe0000000u : 0u);
|
||||
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
return ORBIS_OK;
|
||||
@ -772,7 +571,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32
|
||||
cmdbuf[0] = data_offset;
|
||||
cmdbuf[1] = vertex_sgpr_offset == 0 ? 0 : (vertex_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[2] = instance_sgpr_offset == 0 ? 0 : (instance_sgpr_offset & 0xffffu) + sgpr_offset;
|
||||
cmdbuf[3] = 2; // auto index
|
||||
cmdbuf[3] = sceKernelIsNeoMode() ? flags & 0xe0000000u | 2u : 2u; // auto index
|
||||
|
||||
cmdbuf += 4;
|
||||
WriteTrailingNop<3>(cmdbuf);
|
||||
@ -801,6 +600,7 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
}
|
||||
|
||||
const auto& SetupContext = [](u32* cmdbuf, u32 size, bool clear_state) {
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
if (clear_state) {
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
}
|
||||
@ -808,10 +608,8 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState(u32* cmdbuf, u32 size) {
|
||||
std::memcpy(cmdbuf, &InitSequence[2], (InitSequence.size() - 2) * 4);
|
||||
cmdbuf += InitSequence.size() - 2;
|
||||
|
||||
const auto cmdbuf_left =
|
||||
HwInitPacketSize - (InitSequence.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
};
|
||||
@ -826,12 +624,13 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState175(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
std::memcpy(cmdbuf, &InitSequence175[2], (InitSequence175.size() - 2) * 4);
|
||||
cmdbuf += InitSequence175.size() - 2;
|
||||
|
||||
constexpr auto cmdbuf_left = HwInitPacketSize - (InitSequence175.size() - 2) - 0xc - 1;
|
||||
WriteTrailingNop<cmdbuf_left>(cmdbuf);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
}
|
||||
@ -844,17 +643,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState200(u32* cmdbuf, u32 size) {
|
||||
}
|
||||
|
||||
const auto& SetupContext200 = [](u32* cmdbuf, u32 size, bool clear_state) {
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
if (clear_state) {
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200.size() - 2;
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
std::memcpy(cmdbuf, &InitSequence200Neo[2], (InitSequence200Neo.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200Neo.size() - 2;
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence200NeoCompat[2],
|
||||
(InitSequence200NeoCompat.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200NeoCompat.size() - 2;
|
||||
}
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence200[2], (InitSequence200.size() - 2) * 4);
|
||||
cmdbuf += InitSequence200.size() - 2;
|
||||
}
|
||||
|
||||
const auto cmdbuf_left =
|
||||
HwInitPacketSize - (InitSequence200.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
};
|
||||
@ -870,17 +679,27 @@ u32 PS4_SYSV_ABI sceGnmDrawInitDefaultHardwareState350(u32* cmdbuf, u32 size) {
|
||||
}
|
||||
|
||||
const auto& SetupContext350 = [](u32* cmdbuf, u32 size, bool clear_state) {
|
||||
const auto* cmdbuf_end = cmdbuf + HwInitPacketSize;
|
||||
if (clear_state) {
|
||||
cmdbuf = ClearContextState(cmdbuf);
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350.size() - 2;
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
std::memcpy(cmdbuf, &InitSequence350Neo[2], (InitSequence350Neo.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350Neo.size() - 2;
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence350NeoCompat[2],
|
||||
(InitSequence350NeoCompat.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350NeoCompat.size() - 2;
|
||||
}
|
||||
} else {
|
||||
std::memcpy(cmdbuf, &InitSequence350[2], (InitSequence350.size() - 2) * 4);
|
||||
cmdbuf += InitSequence350.size() - 2;
|
||||
}
|
||||
|
||||
const auto cmdbuf_left =
|
||||
HwInitPacketSize - (InitSequence350.size() - 2) - (clear_state ? 0xc : 0) - 1;
|
||||
cmdbuf = WriteHeader<PM4ItOpcode::Nop>(cmdbuf, cmdbuf_left);
|
||||
cmdbuf = WriteBody(cmdbuf, 0u);
|
||||
const auto cmdbuf_left = cmdbuf_end - cmdbuf - 1;
|
||||
WriteTrailingNop(cmdbuf, cmdbuf_left);
|
||||
|
||||
return HwInitPacketSize;
|
||||
};
|
||||
@ -896,7 +715,11 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
|
||||
if (sceKernelIsNeoMode()) {
|
||||
std::memcpy(cmdbuf, CtxInitSequenceNeo.data(), CtxInitSequenceNeo.size() * 4);
|
||||
} else {
|
||||
std::memcpy(cmdbuf, CtxInitSequence.data(), CtxInitSequence.size() * 4);
|
||||
}
|
||||
return CtxInitPacketSize;
|
||||
}
|
||||
|
||||
@ -908,7 +731,16 @@ u32 PS4_SYSV_ABI sceGnmDrawInitToDefaultContextState400(u32* cmdbuf, u32 size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
std::memcpy(cmdbuf, CtxInitSequence400Neo.data(), CtxInitSequence400Neo.size() * 4);
|
||||
} else {
|
||||
std::memcpy(cmdbuf, CtxInitSequence400NeoCompat.data(),
|
||||
CtxInitSequence400NeoCompat.size() * 4);
|
||||
}
|
||||
} else {
|
||||
std::memcpy(cmdbuf, CtxInitSequence400.data(), CtxInitSequence400.size() * 4);
|
||||
}
|
||||
return CtxInitPacketSize;
|
||||
}
|
||||
|
||||
@ -1030,7 +862,8 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() {
|
||||
|
||||
u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() {
|
||||
LOG_TRACE(Lib_GnmDriver, "called");
|
||||
return Config::isNeoMode() ? 911'000'000 : 800'000'000;
|
||||
// On console this uses an ioctl check, but we assume it is equal to just checking for neo mode.
|
||||
return sceKernelIsNeoMode() ? 911'000'000 : 800'000'000;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() {
|
||||
@ -1369,7 +1202,15 @@ s32 PS4_SYSV_ABI sceGnmResetVgtControl(u32* cmdbuf, u32 size) {
|
||||
if (cmdbuf == nullptr || size != 3) {
|
||||
return -1;
|
||||
}
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u, 0x6d007fu); // IA_MULTI_VGT_PARAM
|
||||
} else {
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, 0xd00ffu); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
} else {
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, 0xffu); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
@ -1830,9 +1671,25 @@ s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_mi
|
||||
return -1;
|
||||
}
|
||||
|
||||
const u32 reg_value =
|
||||
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
if (sceKernelIsNeoMode()) {
|
||||
const u32 wd_switch_on_eop = u32(wd_switch_only_on_eop_mode != 0) << 0x14;
|
||||
const u32 switch_on_eoi = u32(wd_switch_only_on_eop_mode == 0) << 0x13;
|
||||
const u32 reg_value =
|
||||
wd_switch_only_on_eop_mode != 0
|
||||
? (partial_vs_wave_mode & 1) << 0x10 | prim_group_sz_minus_one | wd_switch_on_eop |
|
||||
switch_on_eoi | 0x40000u
|
||||
: prim_group_sz_minus_one & 0x1cffffu | wd_switch_on_eop | switch_on_eoi | 0x50000u;
|
||||
if (!UseNeoCompatSequences) {
|
||||
PM4CmdSetData::SetUconfigReg(cmdbuf, 0x40000258u,
|
||||
reg_value | 0x600000u); // IA_MULTI_VGT_PARAM
|
||||
} else {
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x100002aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
} else {
|
||||
const u32 reg_value =
|
||||
((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu);
|
||||
PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM
|
||||
}
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
@ -2215,9 +2072,25 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
||||
if (sdk_version <= 0x1ffffffu) {
|
||||
liverpool->SubmitGfx(InitSequence, {});
|
||||
} else if (sdk_version <= 0x3ffffffu) {
|
||||
liverpool->SubmitGfx(InitSequence200, {});
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
liverpool->SubmitGfx(InitSequence200Neo, {});
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence200NeoCompat, {});
|
||||
}
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence200, {});
|
||||
}
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence350, {});
|
||||
if (sceKernelIsNeoMode()) {
|
||||
if (!UseNeoCompatSequences) {
|
||||
liverpool->SubmitGfx(InitSequence350Neo, {});
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence350NeoCompat, {});
|
||||
}
|
||||
} else {
|
||||
liverpool->SubmitGfx(InitSequence350, {});
|
||||
}
|
||||
}
|
||||
send_init_packet = false;
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ int PS4_SYSV_ABI sceGnmDisableMipStatsReport();
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchDirect(u32* cmdbuf, u32 size, u32 threads_x, u32 threads_y,
|
||||
u32 threads_z, u32 flags);
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchIndirect(u32* cmdbuf, u32 size, u32 data_offset, u32 flags);
|
||||
int PS4_SYSV_ABI sceGnmDispatchIndirectOnMec();
|
||||
s32 PS4_SYSV_ABI sceGnmDispatchIndirectOnMec(u32* cmdbuf, u32 size, VAddr args, u32 modifier);
|
||||
u32 PS4_SYSV_ABI sceGnmDispatchInitDefaultHardwareState(u32* cmdbuf, u32 size);
|
||||
s32 PS4_SYSV_ABI sceGnmDrawIndex(u32* cmdbuf, u32 size, u32 index_count, uintptr_t index_addr,
|
||||
u32 flags, u32 type);
|
||||
|
542
src/core/libraries/gnmdriver/gnmdriver_init.h
Normal file
542
src/core/libraries/gnmdriver/gnmdriver_init.h
Normal file
@ -0,0 +1,542 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace Libraries::GnmDriver {
|
||||
|
||||
constexpr auto HwInitPacketSize = 0x100u;
|
||||
|
||||
// clang-format off
|
||||
constexpr std::array InitSequence{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6000000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence.size() == 0x73 + 2);
|
||||
|
||||
constexpr std::array InitSequence175{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1ffu,
|
||||
0xc0017600u, 0x46u, 0x1ffu,
|
||||
0xc0017600u, 0x87u, 0x1ffu,
|
||||
0xc0017600u, 0xc7u, 0x1ffu,
|
||||
0xc0017600u, 0x107u, 0u,
|
||||
0xc0017600u, 0x147u, 0x1ffu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence175.size() == 0x73 + 2);
|
||||
|
||||
constexpr std::array InitSequence200{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
};
|
||||
static_assert(InitSequence200.size() == 0x76 + 2);
|
||||
|
||||
constexpr std::array InitSequence200Neo{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0017900u, 0x40000258u, 0x6d007fu,
|
||||
};
|
||||
static_assert(InitSequence200Neo.size() == 0x83 + 2);
|
||||
|
||||
constexpr std::array InitSequence200NeoCompat{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x100002aau, 0xd00ffu,
|
||||
};
|
||||
static_assert(InitSequence200NeoCompat.size() == 0x83 + 2);
|
||||
|
||||
constexpr std::array InitSequence350{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
};
|
||||
static_assert(InitSequence350.size() == 0x7c + 2);
|
||||
|
||||
constexpr std::array InitSequence350Neo{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0017900u, 0x40000258u, 0x6d007fu,
|
||||
};
|
||||
static_assert(InitSequence350Neo.size() == 0x86 + 2);
|
||||
|
||||
constexpr std::array InitSequence350NeoCompat{
|
||||
// A fake preamble to mimic context reset sent by FW
|
||||
0xc0001200u, 0u, // IT_CLEAR_STATE
|
||||
|
||||
// Actual init state sequence
|
||||
0xc0017600u, 0x216u, 0xffffffffu,
|
||||
0xc0017600u, 0x217u, 0xffffffffu,
|
||||
0xc0017600u, 0x219u, 0xffffffffu,
|
||||
0xc0017600u, 0x21au, 0xffffffffu,
|
||||
0xc0017600u, 0x215u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0017600u, 7u, 0x1701ffu,
|
||||
0xc0017600u, 0x46u, 0x1701fdu,
|
||||
0xc0017600u, 0x87u, 0x1701ffu,
|
||||
0xc0017600u, 0xc7u, 0x1701fdu,
|
||||
0xc0017600u, 0x107u, 0x17u,
|
||||
0xc0017600u, 0x147u, 0x1701fdu,
|
||||
0xc0017600u, 0x47u, 0x1cu,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x200u, 0xe0000000u,
|
||||
0xc0016900u, 0x100002aau, 0xd00ffu,
|
||||
};
|
||||
static_assert(InitSequence350NeoCompat.size() == 0x86 + 2);
|
||||
|
||||
constexpr std::array CtxInitSequence{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0111000u, 0u
|
||||
};
|
||||
static_assert(CtxInitSequence.size() == 0x0f);
|
||||
|
||||
constexpr std::array CtxInitSequenceNeo{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc00d1000, 0u
|
||||
};
|
||||
static_assert(CtxInitSequenceNeo.size() == 0x13);
|
||||
|
||||
constexpr std::array CtxInitSequence400{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0016900u, 0x2aau, 0xffu,
|
||||
0xc09e1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400.size() == 0x61);
|
||||
|
||||
constexpr std::array CtxInitSequence400Neo{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0017900u, 0x40000258u, 0x6d007fu,
|
||||
0xc09a1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400Neo.size() == 0x65);
|
||||
|
||||
constexpr std::array CtxInitSequence400NeoCompat{
|
||||
0xc0012800u, 0x80000000u, 0x80000000u,
|
||||
0xc0001200u, 0u,
|
||||
0xc0016900u, 0x2f9u, 0x2du,
|
||||
0xc0016900u, 0x282u, 8u,
|
||||
0xc0016900u, 0x280u, 0x80008u,
|
||||
0xc0016900u, 0x281u, 0xffff0000u,
|
||||
0xc0016900u, 0x204u, 0u,
|
||||
0xc0016900u, 0x206u, 0x43fu,
|
||||
0xc0016900u, 0x83u, 0xffffu,
|
||||
0xc0016900u, 0x317u, 0x10u,
|
||||
0xc0016900u, 0x2fau, 0x3f800000u,
|
||||
0xc0016900u, 0x2fcu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fbu, 0x3f800000u,
|
||||
0xc0016900u, 0x2fdu, 0x3f800000u,
|
||||
0xc0016900u, 0x202u, 0xcc0010u,
|
||||
0xc0016900u, 0x30eu, 0xffffffffu,
|
||||
0xc0016900u, 0x30fu, 0xffffffffu,
|
||||
0xc0002f00u, 1u,
|
||||
0xc0016900u, 0x1b1u, 2u,
|
||||
0xc0016900u, 0x101u, 0u,
|
||||
0xc0016900u, 0x100u, 0xffffffffu,
|
||||
0xc0016900u, 0x103u, 0u,
|
||||
0xc0016900u, 0x284u, 0u,
|
||||
0xc0016900u, 0x290u, 0u,
|
||||
0xc0016900u, 0x2aeu, 0u,
|
||||
0xc0016900u, 0x102u, 0u,
|
||||
0xc0016900u, 0x292u, 0u,
|
||||
0xc0016900u, 0x293u, 0x6020000u,
|
||||
0xc0016900u, 0x2f8u, 0u,
|
||||
0xc0016900u, 0x2deu, 0x1e9u,
|
||||
0xc0026900u, 0xebu, 0xff00ff00u, 0xff00u,
|
||||
0xc0036900u, 0x295u, 0x100u, 0x100u, 4u,
|
||||
0xc0016900u, 0x100002aau, 0xd00ffu,
|
||||
0xc09a1000u,
|
||||
};
|
||||
static_assert(CtxInitSequence400Neo.size() == 0x65);
|
||||
// clang-format on
|
||||
|
||||
} // namespace Libraries::GnmDriver
|
@ -14,7 +14,8 @@ namespace Libraries::Kernel {
|
||||
|
||||
int PS4_SYSV_ABI sceKernelIsNeoMode() {
|
||||
LOG_DEBUG(Kernel_Sce, "called");
|
||||
return Config::isNeoMode();
|
||||
return Config::isNeoModeConsole() &&
|
||||
Common::ElfInfo::Instance().PSFAttributes().support_neo_mode;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceKernelGetCompiledSdkVersion(int* ver) {
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "core/libraries/libc_internal/libc_internal.h"
|
||||
#include "core/libraries/libpng/pngdec.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/mouse/mouse.h"
|
||||
#include "core/libraries/move/move.h"
|
||||
#include "core/libraries/network/http.h"
|
||||
#include "core/libraries/network/net.h"
|
||||
@ -97,6 +98,7 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) {
|
||||
Libraries::Move::RegisterlibSceMove(sym);
|
||||
Libraries::Fiber::RegisterlibSceFiber(sym);
|
||||
Libraries::JpegEnc::RegisterlibSceJpegEnc(sym);
|
||||
Libraries::Mouse::RegisterlibSceMouse(sym);
|
||||
}
|
||||
|
||||
} // namespace Libraries
|
||||
|
99
src/core/libraries/mouse/mouse.cpp
Normal file
99
src/core/libraries/mouse/mouse.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
// Generated By moduleGenerator
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/error_codes.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "mouse.h"
|
||||
|
||||
namespace Libraries::Mouse {
|
||||
|
||||
int PS4_SYSV_ABI sceMouseClose() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseConnectPort() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDebugGetDeviceId() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDeviceOpen() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDisconnectDevice() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseDisconnectPort() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseGetDeviceInfo() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseInit() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseMbusInit() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseOpen() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseRead() {
|
||||
LOG_DEBUG(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetHandType() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetPointerSpeed() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceMouseSetProcessPrivilege() {
|
||||
LOG_ERROR(Lib_Mouse, "(STUBBED) called");
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym) {
|
||||
LIB_FUNCTION("cAnT0Rw-IwU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseClose);
|
||||
LIB_FUNCTION("Ymyy1HSSJLQ", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseConnectPort);
|
||||
LIB_FUNCTION("BRXOoXQtb+k", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDebugGetDeviceId);
|
||||
LIB_FUNCTION("WiGKINCZWkc", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDeviceOpen);
|
||||
LIB_FUNCTION("eDQTFHbgeTU", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectDevice);
|
||||
LIB_FUNCTION("jJP1vYMEPd4", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseDisconnectPort);
|
||||
LIB_FUNCTION("QA9Qupz3Zjw", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseGetDeviceInfo);
|
||||
LIB_FUNCTION("Qs0wWulgl7U", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseInit);
|
||||
LIB_FUNCTION("1FeceR5YhAo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseMbusInit);
|
||||
LIB_FUNCTION("RaqxZIf6DvE", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseOpen);
|
||||
LIB_FUNCTION("x8qnXqh-tiM", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseRead);
|
||||
LIB_FUNCTION("crkFfp-cmFo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetHandType);
|
||||
LIB_FUNCTION("ghLUU2Z5Lcg", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetPointerSpeed);
|
||||
LIB_FUNCTION("6aANndpS0Wo", "libSceMouse", 1, "libSceMouse", 1, 1, sceMouseSetProcessPrivilege);
|
||||
};
|
||||
|
||||
} // namespace Libraries::Mouse
|
29
src/core/libraries/mouse/mouse.h
Normal file
29
src/core/libraries/mouse/mouse.h
Normal file
@ -0,0 +1,29 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Core::Loader {
|
||||
class SymbolsResolver;
|
||||
}
|
||||
|
||||
namespace Libraries::Mouse {
|
||||
|
||||
int PS4_SYSV_ABI sceMouseClose();
|
||||
int PS4_SYSV_ABI sceMouseConnectPort();
|
||||
int PS4_SYSV_ABI sceMouseDebugGetDeviceId();
|
||||
int PS4_SYSV_ABI sceMouseDeviceOpen();
|
||||
int PS4_SYSV_ABI sceMouseDisconnectDevice();
|
||||
int PS4_SYSV_ABI sceMouseDisconnectPort();
|
||||
int PS4_SYSV_ABI sceMouseGetDeviceInfo();
|
||||
int PS4_SYSV_ABI sceMouseInit();
|
||||
int PS4_SYSV_ABI sceMouseMbusInit();
|
||||
int PS4_SYSV_ABI sceMouseOpen();
|
||||
int PS4_SYSV_ABI sceMouseRead();
|
||||
int PS4_SYSV_ABI sceMouseSetHandType();
|
||||
int PS4_SYSV_ABI sceMouseSetPointerSpeed();
|
||||
int PS4_SYSV_ABI sceMouseSetProcessPrivilege();
|
||||
|
||||
void RegisterlibSceMouse(Core::Loader::SymbolsResolver* sym);
|
||||
} // namespace Libraries::Mouse
|
@ -157,7 +157,7 @@ s32 PS4_SYSV_ABI scePlayGoGetLocus(OrbisPlayGoHandle handle, const OrbisPlayGoCh
|
||||
}
|
||||
|
||||
for (int i = 0; i < numberOfEntries; i++) {
|
||||
if (chunkIds[i] <= playgo->chunks.size()) {
|
||||
if (chunkIds[i] < playgo->chunks.size()) {
|
||||
outLoci[i] = OrbisPlayGoLocus::LocalFast;
|
||||
} else {
|
||||
outLoci[i] = OrbisPlayGoLocus::NotDownloaded;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "common/debug.h"
|
||||
#include "core/libraries/kernel/memory.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
|
||||
@ -35,7 +36,7 @@ MemoryManager::~MemoryManager() = default;
|
||||
|
||||
void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1,
|
||||
bool use_extended_mem2) {
|
||||
const bool is_neo = Config::isNeoMode();
|
||||
const bool is_neo = ::Libraries::Kernel::sceKernelIsNeoMode();
|
||||
auto total_size = is_neo ? SCE_KERNEL_TOTAL_MEM_PRO : SCE_KERNEL_TOTAL_MEM;
|
||||
if (!use_extended_mem1 && is_neo) {
|
||||
total_size -= 256_MB;
|
||||
|
105
src/emulator.cpp
105
src/emulator.cpp
@ -28,8 +28,6 @@
|
||||
#include "core/file_format/trp.h"
|
||||
#include "core/file_sys/fs.h"
|
||||
#include "core/libraries/disc_map/disc_map.h"
|
||||
#include "core/libraries/fiber/fiber.h"
|
||||
#include "core/libraries/jpeg/jpegenc.h"
|
||||
#include "core/libraries/libc_internal/libc_internal.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/libraries/ngs2/ngs2.h"
|
||||
@ -59,8 +57,8 @@ Emulator::Emulator() {
|
||||
LOG_INFO(Loader, "Branch {}", Common::g_scm_branch);
|
||||
LOG_INFO(Loader, "Description {}", Common::g_scm_desc);
|
||||
|
||||
LOG_INFO(Config, "General Logtype: {}", Config::getLogType());
|
||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoMode());
|
||||
LOG_INFO(Config, "General LogType: {}", Config::getLogType());
|
||||
LOG_INFO(Config, "General isNeo: {}", Config::isNeoModeConsole());
|
||||
LOG_INFO(Config, "GPU isNullGpu: {}", Config::nullGpu());
|
||||
LOG_INFO(Config, "GPU shouldDumpShaders: {}", Config::dumpShaders());
|
||||
LOG_INFO(Config, "GPU vblankDivider: {}", Config::vblankDiv());
|
||||
@ -101,19 +99,12 @@ Emulator::~Emulator() {
|
||||
}
|
||||
|
||||
void Emulator::Run(const std::filesystem::path& file) {
|
||||
|
||||
// Use the eboot from the separated updates folder if it's there
|
||||
std::filesystem::path game_patch_folder = file.parent_path();
|
||||
game_patch_folder += "-UPDATE";
|
||||
std::filesystem::path eboot_path = std::filesystem::exists(game_patch_folder / file.filename())
|
||||
? game_patch_folder / file.filename()
|
||||
: file;
|
||||
|
||||
// Applications expect to be run from /app0 so mount the file's parent path as app0.
|
||||
auto* mnt = Common::Singleton<Core::FileSys::MntPoints>::Instance();
|
||||
mnt->Mount(file.parent_path(), "/app0");
|
||||
const auto game_folder = file.parent_path();
|
||||
mnt->Mount(game_folder, "/app0");
|
||||
// Certain games may use /hostapp as well such as CUSA001100
|
||||
mnt->Mount(file.parent_path(), "/hostapp");
|
||||
mnt->Mount(game_folder, "/hostapp");
|
||||
|
||||
auto& game_info = Common::ElfInfo::Instance();
|
||||
|
||||
@ -122,50 +113,52 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
std::string title;
|
||||
std::string app_version;
|
||||
u32 fw_version;
|
||||
Common::PSFAttributes psf_attributes{};
|
||||
|
||||
std::filesystem::path sce_sys_folder = eboot_path.parent_path() / "sce_sys";
|
||||
if (std::filesystem::is_directory(sce_sys_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(sce_sys_folder)) {
|
||||
if (entry.path().filename() == "param.sfo") {
|
||||
auto* param_sfo = Common::Singleton<PSF>::Instance();
|
||||
const bool success = param_sfo->Open(sce_sys_folder / "param.sfo");
|
||||
ASSERT_MSG(success, "Failed to open param.sfo");
|
||||
const auto content_id = param_sfo->GetString("CONTENT_ID");
|
||||
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
|
||||
id = std::string(*content_id, 7, 9);
|
||||
Libraries::NpTrophy::game_serial = id;
|
||||
const auto trophyDir =
|
||||
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
|
||||
if (!std::filesystem::exists(trophyDir)) {
|
||||
TRP trp;
|
||||
if (!trp.Extract(eboot_path.parent_path(), id)) {
|
||||
LOG_ERROR(Loader, "Couldn't extract trophies");
|
||||
}
|
||||
}
|
||||
const auto param_sfo_path = mnt->GetHostPath("/app0/sce_sys/param.sfo");
|
||||
if (std::filesystem::exists(param_sfo_path)) {
|
||||
auto* param_sfo = Common::Singleton<PSF>::Instance();
|
||||
const bool success = param_sfo->Open(param_sfo_path);
|
||||
ASSERT_MSG(success, "Failed to open param.sfo");
|
||||
const auto content_id = param_sfo->GetString("CONTENT_ID");
|
||||
ASSERT_MSG(content_id.has_value(), "Failed to get CONTENT_ID");
|
||||
id = std::string(*content_id, 7, 9);
|
||||
Libraries::NpTrophy::game_serial = id;
|
||||
const auto trophyDir =
|
||||
Common::FS::GetUserPath(Common::FS::PathType::MetaDataDir) / id / "TrophyFiles";
|
||||
if (!std::filesystem::exists(trophyDir)) {
|
||||
TRP trp;
|
||||
if (!trp.Extract(game_folder, id)) {
|
||||
LOG_ERROR(Loader, "Couldn't extract trophies");
|
||||
}
|
||||
}
|
||||
#ifdef ENABLE_QT_GUI
|
||||
MemoryPatcher::g_game_serial = id;
|
||||
MemoryPatcher::g_game_serial = id;
|
||||
|
||||
// Timer for 'Play Time'
|
||||
QTimer* timer = new QTimer();
|
||||
QObject::connect(timer, &QTimer::timeout, [this, id]() {
|
||||
UpdatePlayTime(id);
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
});
|
||||
timer->start(60000); // 60000 ms = 1 minute
|
||||
// Timer for 'Play Time'
|
||||
QTimer* timer = new QTimer();
|
||||
QObject::connect(timer, &QTimer::timeout, [this, id]() {
|
||||
UpdatePlayTime(id);
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
});
|
||||
timer->start(60000); // 60000 ms = 1 minute
|
||||
#endif
|
||||
title = param_sfo->GetString("TITLE").value_or("Unknown title");
|
||||
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
|
||||
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
|
||||
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
|
||||
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
|
||||
} else if (entry.path().filename() == "pic1.png") {
|
||||
auto* splash = Common::Singleton<Splash>::Instance();
|
||||
if (splash->IsLoaded()) {
|
||||
continue;
|
||||
}
|
||||
if (!splash->Open(entry.path())) {
|
||||
LOG_ERROR(Loader, "Game splash: unable to open file");
|
||||
}
|
||||
title = param_sfo->GetString("TITLE").value_or("Unknown title");
|
||||
LOG_INFO(Loader, "Game id: {} Title: {}", id, title);
|
||||
fw_version = param_sfo->GetInteger("SYSTEM_VER").value_or(0x4700000);
|
||||
app_version = param_sfo->GetString("APP_VER").value_or("Unknown version");
|
||||
LOG_INFO(Loader, "Fw: {:#x} App Version: {}", fw_version, app_version);
|
||||
if (const auto raw_attributes = param_sfo->GetInteger("ATTRIBUTE")) {
|
||||
psf_attributes.raw = *raw_attributes;
|
||||
}
|
||||
}
|
||||
|
||||
const auto pic1_path = mnt->GetHostPath("/app0/sce_sys/pic1.png");
|
||||
if (std::filesystem::exists(pic1_path)) {
|
||||
auto* splash = Common::Singleton<Splash>::Instance();
|
||||
if (!splash->IsLoaded()) {
|
||||
if (!splash->Open(pic1_path)) {
|
||||
LOG_ERROR(Loader, "Game splash: unable to open file");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -176,6 +169,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
game_info.app_ver = app_version;
|
||||
game_info.firmware_ver = fw_version & 0xFFF00000;
|
||||
game_info.raw_firmware_ver = fw_version;
|
||||
game_info.psf_attributes = psf_attributes;
|
||||
|
||||
std::string game_title = fmt::format("{} - {} <{}>", id, title, app_version);
|
||||
std::string window_title = "";
|
||||
@ -219,6 +213,7 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
Libraries::InitHLELibs(&linker->GetHLESymbols());
|
||||
|
||||
// Load the module with the linker
|
||||
const auto eboot_path = mnt->GetHostPath("/app0/" + file.filename().string());
|
||||
linker->LoadModule(eboot_path);
|
||||
|
||||
// check if we have system modules to load
|
||||
@ -236,6 +231,8 @@ void Emulator::Run(const std::filesystem::path& file) {
|
||||
}
|
||||
|
||||
// Load all prx from separate update's sce_module folder
|
||||
std::filesystem::path game_patch_folder = game_folder;
|
||||
game_patch_folder += "-UPDATE";
|
||||
std::filesystem::path update_module_folder = game_patch_folder / "sce_module";
|
||||
if (std::filesystem::is_directory(update_module_folder)) {
|
||||
for (const auto& entry : std::filesystem::directory_iterator(update_module_folder)) {
|
||||
|
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <SDL3/SDL.h>
|
||||
#include "common/config.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/kernel/time.h"
|
||||
#include "core/libraries/pad/pad.h"
|
||||
@ -189,11 +190,6 @@ void GameController::CalculateOrientation(Libraries::Pad::OrbisFVector3& acceler
|
||||
gz += Kp * ez + Ki * eInt[2];
|
||||
|
||||
//// Integrate rate of change of quaternion
|
||||
// float pa = q2, pb = q3, pc = q4;
|
||||
// q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
|
||||
// q2 += (pa * gx + pb * gz - pc * gy) * (0.5f * deltaTime);
|
||||
// q3 += (pb * gy - pa * gz + pc * gx) * (0.5f * deltaTime);
|
||||
// q4 += (pc * gz + pa * gy - pb * gx) * (0.5f * deltaTime);
|
||||
q1 += (-q2 * gx - q3 * gy - q4 * gz) * (0.5f * deltaTime);
|
||||
q2 += (q1 * gx + q3 * gz - q4 * gy) * (0.5f * deltaTime);
|
||||
q3 += (q1 * gy - q2 * gz + q4 * gx) * (0.5f * deltaTime);
|
||||
@ -247,18 +243,21 @@ void GameController::TryOpenSDLController() {
|
||||
int gamepad_count;
|
||||
SDL_JoystickID* gamepads = SDL_GetGamepads(&gamepad_count);
|
||||
m_sdl_gamepad = gamepad_count > 0 ? SDL_OpenGamepad(gamepads[0]) : nullptr;
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
|
||||
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
|
||||
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
|
||||
}
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
|
||||
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
|
||||
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
|
||||
if (Config::getIsMotionControlsEnabled()) {
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_GYRO, true)) {
|
||||
gyro_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_GYRO);
|
||||
LOG_INFO(Input, "Gyro initialized, poll rate: {}", gyro_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize gyro controls for gamepad");
|
||||
}
|
||||
if (SDL_SetGamepadSensorEnabled(m_sdl_gamepad, SDL_SENSOR_ACCEL, true)) {
|
||||
accel_poll_rate = SDL_GetGamepadSensorDataRate(m_sdl_gamepad, SDL_SENSOR_ACCEL);
|
||||
LOG_INFO(Input, "Accel initialized, poll rate: {}", accel_poll_rate);
|
||||
} else {
|
||||
LOG_ERROR(Input, "Failed to initialize accel controls for gamepad");
|
||||
}
|
||||
}
|
||||
|
||||
SDL_free(gamepads);
|
||||
|
||||
SetLightBarRGB(0, 0, 255);
|
||||
@ -266,6 +265,7 @@ void GameController::TryOpenSDLController() {
|
||||
}
|
||||
|
||||
u32 GameController::Poll() {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
if (m_connected) {
|
||||
auto time = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
if (m_states_num == 0) {
|
||||
|
@ -339,6 +339,8 @@ void SettingsDialog::LoadValuesFromConfig() {
|
||||
toml::find_or<std::string>(data, "Input", "backButtonBehavior", "left"));
|
||||
int index = ui->backButtonBehaviorComboBox->findData(backButtonBehavior);
|
||||
ui->backButtonBehaviorComboBox->setCurrentIndex(index != -1 ? index : 0);
|
||||
ui->motionControlsCheckBox->setChecked(
|
||||
toml::find_or<bool>(data, "Input", "isMotionControlsEnabled", true));
|
||||
|
||||
ui->removeFolderButton->setEnabled(!ui->gameFoldersListWidget->selectedItems().isEmpty());
|
||||
ResetInstallFolders();
|
||||
@ -532,6 +534,7 @@ void SettingsDialog::UpdateSettings() {
|
||||
|
||||
const QVector<std::string> TouchPadIndex = {"left", "center", "right", "none"};
|
||||
Config::setBackButtonBehavior(TouchPadIndex[ui->backButtonBehaviorComboBox->currentIndex()]);
|
||||
Config::setIsMotionControlsEnabled(ui->motionControlsCheckBox->isChecked());
|
||||
Config::setFullscreenMode(ui->fullscreenCheckBox->isChecked());
|
||||
Config::setisTrophyPopupDisabled(ui->disableTrophycheckBox->isChecked());
|
||||
Config::setPlayBGM(ui->playBGMCheckBox->isChecked());
|
||||
|
@ -815,6 +815,13 @@
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="motionControlsCheckBox">
|
||||
<property name="text">
|
||||
<string>Enable Motion Controls</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QWidget" name="controllerWidgetSpacer" native="true">
|
||||
<property name="enabled">
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1415
src/qt_gui/translations/sv.ts
Normal file
1415
src/qt_gui/translations/sv.ts
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
@ -31,14 +32,16 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||
return;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle;
|
||||
const auto col_buf = runtime_info.fs_info.color_buffers[index];
|
||||
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
|
||||
const auto [r, g, b, a] = col_buf.swizzle;
|
||||
const std::array swizzle_array = {r, g, b, a};
|
||||
const auto swizzled_comp = swizzle_array[comp];
|
||||
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
|
||||
ir.SetAttribute(attrib, value, comp);
|
||||
ir.SetAttribute(attrib, converted, comp);
|
||||
return;
|
||||
}
|
||||
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
|
||||
};
|
||||
|
||||
const auto unpack = [&](u32 idx) {
|
||||
|
@ -106,6 +106,10 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
|
||||
return S_FF1_I32_B32(inst);
|
||||
case Opcode::S_FF1_I32_B64:
|
||||
return S_FF1_I32_B64(inst);
|
||||
case Opcode::S_BITSET0_B32:
|
||||
return S_BITSET_B32(inst, 0);
|
||||
case Opcode::S_BITSET1_B32:
|
||||
return S_BITSET_B32(inst, 1);
|
||||
case Opcode::S_AND_SAVEEXEC_B64:
|
||||
return S_SAVEEXEC_B64(NegateMode::None, false, inst);
|
||||
case Opcode::S_ORN2_SAVEEXEC_B64:
|
||||
@ -607,6 +611,13 @@ void Translator::S_FF1_I32_B64(const GcnInst& inst) {
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
|
||||
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
|
||||
const IR::U32 result{ir.BitFieldInsert(old_value, ir.Imm32(bit_value), offset, ir.Imm32(1U))};
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) {
|
||||
// This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs)
|
||||
// However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination
|
||||
|
@ -114,6 +114,7 @@ public:
|
||||
void S_BCNT1_I32_B64(const GcnInst& inst);
|
||||
void S_FF1_I32_B32(const GcnInst& inst);
|
||||
void S_FF1_I32_B64(const GcnInst& inst);
|
||||
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
|
||||
void S_GETPC_B64(u32 pc, const GcnInst& inst);
|
||||
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
|
||||
void S_ABS_I32(const GcnInst& inst);
|
||||
|
@ -904,7 +904,7 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
|
||||
case ConditionOp::GE:
|
||||
return ir.FPGreaterThanEqual(src0, src1);
|
||||
case ConditionOp::U:
|
||||
return ir.LogicalNot(ir.LogicalAnd(ir.FPIsNan(src0), ir.FPIsNan(src1)));
|
||||
return ir.LogicalOr(ir.FPIsNan(src0), ir.FPIsNan(src1));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -301,8 +301,7 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
});
|
||||
}
|
||||
|
||||
void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
s32 binding{};
|
||||
AmdGpu::Buffer buffer;
|
||||
if (binding = TryHandleInlineCbuf(inst, info, descriptors, buffer); binding == -1) {
|
||||
@ -317,19 +316,191 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
});
|
||||
}
|
||||
|
||||
// Update buffer descriptor format.
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
// Read sampler sharp.
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
// Patch image and sampler handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding | sampler_binding << 16));
|
||||
} else {
|
||||
// Patch image handle.
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
|
||||
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
ASSERT(!buffer.add_tid_enable);
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emittion time.
|
||||
// Address of constant buffer reads can be calculated at IR emission time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
|
||||
const IR::U32 index_stride = ir.Imm32(buffer.index_stride);
|
||||
const IR::U32 element_size = ir.Imm32(buffer.element_size);
|
||||
|
||||
@ -366,21 +537,27 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
inst.SetArg(1, address);
|
||||
}
|
||||
|
||||
void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||
const IR::Inst* producer = handle->Arg(0).InstRecursive();
|
||||
const auto sharp = TrackSharp(producer, info);
|
||||
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
|
||||
const s32 binding = descriptors.Add(TextureBufferResource{
|
||||
.sharp_idx = sharp,
|
||||
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
|
||||
});
|
||||
void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.texture_buffers[handle.U32()];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
|
||||
// Replace handle with binding index in texture buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(binding));
|
||||
ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable);
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.SetArg(2, converted);
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
}
|
||||
|
||||
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
|
||||
@ -409,39 +586,14 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors, const IR::Inst* producer,
|
||||
const u32 image_binding, const AmdGpu::Image& image) {
|
||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
|
||||
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||
const IR::Value& handle = producer->Arg(1);
|
||||
// Inline sampler resource.
|
||||
if (handle.IsImmediate()) {
|
||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = std::numeric_limits<u32>::max(),
|
||||
.inline_sampler = inline_sampler,
|
||||
});
|
||||
return {binding, inline_sampler};
|
||||
}
|
||||
// Normal sampler resource.
|
||||
const auto ssharp_handle = handle.InstRecursive();
|
||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||
const auto ssharp = TrackSharp(ssharp_ud, info);
|
||||
const auto binding = descriptors.Add(SamplerResource{
|
||||
.sharp_idx = ssharp,
|
||||
.associated_image = image_binding,
|
||||
.disable_aniso = disable_aniso,
|
||||
});
|
||||
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
|
||||
}();
|
||||
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const AmdGpu::Image& image) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
|
||||
auto sampler = sampler_res.GetSharp(info);
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16);
|
||||
|
||||
IR::Inst* body1 = inst.Arg(1).InstRecursive();
|
||||
IR::Inst* body2 = inst.Arg(2).InstRecursive();
|
||||
@ -539,8 +691,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
// Query dimensions of image if needed for normalization.
|
||||
// We can't use the image sharp because it could be bound to a different image later.
|
||||
const auto dimensions =
|
||||
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
|
||||
: IR::Value{};
|
||||
unnormalized ? ir.ImageQueryDimension(handle, ir.Imm32(0u), ir.Imm1(false)) : IR::Value{};
|
||||
const auto get_coord = [&](u32 coord_idx, u32 dim_idx) -> IR::Value {
|
||||
const auto coord = get_addr_reg(coord_idx);
|
||||
if (unnormalized) {
|
||||
@ -589,7 +740,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
: IR::F32{};
|
||||
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||
|
||||
auto new_inst = [&] -> IR::Value {
|
||||
auto texel = [&] -> IR::Value {
|
||||
if (inst_info.is_gather) {
|
||||
if (inst_info.is_depth) {
|
||||
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||
@ -611,94 +762,30 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
}
|
||||
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
|
||||
}();
|
||||
inst.ReplaceUsesWithAndRemove(new_inst);
|
||||
|
||||
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
|
||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
const auto opcode = inst->GetOpcode();
|
||||
if (opcode == IR::Opcode::CompositeConstructU32x2 || // IMAGE_SAMPLE (image+sampler)
|
||||
opcode == IR::Opcode::ReadConst || // IMAGE_LOAD (image only)
|
||||
opcode == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to find image sharp source");
|
||||
const IR::Inst* producer = result.value();
|
||||
const bool has_sampler = producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2;
|
||||
const auto tsharp_handle = has_sampler ? producer->Arg(0).InstRecursive() : producer;
|
||||
|
||||
// Read image sharp.
|
||||
const auto tsharp = TrackSharp(tsharp_handle, info);
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto image = info.ReadUdSharp<AmdGpu::Image>(tsharp);
|
||||
if (!image.Valid()) {
|
||||
LOG_ERROR(Render_Vulkan, "Shader compiled with unbound image!");
|
||||
image = AmdGpu::Image::Null();
|
||||
}
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
const bool is_read = inst.GetOpcode() == IR::Opcode::ImageRead;
|
||||
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite;
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::ImageRead:
|
||||
case IR::Opcode::ImageSampleRaw: {
|
||||
IR::F32 fmaskx = ir.BitCast<IR::F32>(ir.Imm32(0x76543210));
|
||||
IR::F32 fmasky = ir.BitCast<IR::F32>(ir.Imm32(0xfedcba98));
|
||||
inst.ReplaceUsesWith(ir.CompositeConstruct(fmaskx, fmasky));
|
||||
return;
|
||||
}
|
||||
case IR::Opcode::ImageQueryLod:
|
||||
inst.ReplaceUsesWith(ir.Imm32(1));
|
||||
return;
|
||||
case IR::Opcode::ImageQueryDimensions: {
|
||||
IR::Value dims = ir.CompositeConstruct(ir.Imm32(static_cast<u32>(image.width)), // x
|
||||
ir.Imm32(static_cast<u32>(image.width)), // y
|
||||
ir.Imm32(1), ir.Imm32(1)); // depth, mip
|
||||
inst.ReplaceUsesWith(dims);
|
||||
|
||||
// Track FMask resource to do specialization.
|
||||
descriptors.Add(FMaskResource{
|
||||
.sharp_idx = tsharp,
|
||||
});
|
||||
return;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE_MSG("Can't patch fmask instruction {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
|
||||
u32 image_binding = descriptors.Add(ImageResource{
|
||||
.sharp_idx = tsharp,
|
||||
.is_depth = bool(inst_info.is_depth),
|
||||
.is_atomic = IsImageAtomicInstruction(inst),
|
||||
.is_array = bool(inst_info.is_array),
|
||||
.is_read = is_read,
|
||||
.is_written = is_written,
|
||||
});
|
||||
|
||||
// Sample instructions must be resolved into a new instruction using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
|
||||
return;
|
||||
}
|
||||
|
||||
// Patch image handle
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(image_binding));
|
||||
|
||||
// No need to patch coordinates if we are just querying.
|
||||
void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
// Nothing to patch for dimension query.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageQueryDimensions) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto image_res = info.images[handle.U32() & 0xFFFF];
|
||||
auto image = image_res.GetSharp(info);
|
||||
|
||||
// Sample instructions must be handled separately using address register data.
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||
PatchImageSampleArgs(block, inst, info, image);
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
|
||||
// Now that we know the image type, adjust texture coordinate vector.
|
||||
IR::Inst* body = inst.Arg(1).InstRecursive();
|
||||
const auto [coords, arg] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||
@ -719,152 +806,77 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
|
||||
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
|
||||
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), is_written,
|
||||
inst_info.is_array),
|
||||
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
|
||||
body->Arg(3)};
|
||||
default:
|
||||
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
|
||||
}
|
||||
}();
|
||||
inst.SetArg(1, coords);
|
||||
|
||||
if (inst_info.has_lod) {
|
||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite);
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa &&
|
||||
image.GetType() != AmdGpu::ImageType::Color2DMsaaArray);
|
||||
inst.SetArg(2, arg);
|
||||
} else if ((image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) &&
|
||||
(inst.GetOpcode() == IR::Opcode::ImageRead ||
|
||||
inst.GetOpcode() == IR::Opcode::ImageWrite)) {
|
||||
inst.SetArg(3, arg);
|
||||
}
|
||||
}
|
||||
const auto has_ms = image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
|
||||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray;
|
||||
ASSERT(!inst_info.has_lod || !has_ms);
|
||||
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
|
||||
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
|
||||
|
||||
void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto buffer_res = info.texture_buffers[binding];
|
||||
const auto buffer = buffer_res.GetSharp(info);
|
||||
if (!buffer.Valid()) {
|
||||
// Don't need to swizzle invalid buffer.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
|
||||
inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto binding = inst.Arg(0).U32();
|
||||
const auto image_res = info.images[binding & 0xFFFF];
|
||||
const auto image = image_res.GetSharp(info);
|
||||
if (!image.Valid() || !image_res.IsStorage(image)) {
|
||||
// Don't need to swizzle invalid or non-storage image.
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect()));
|
||||
} else if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
const auto lod = inst.Arg(2);
|
||||
const auto ms = inst.Arg(3);
|
||||
const auto texel =
|
||||
ir.ImageRead(inst.Arg(0), inst.Arg(1), lod.IsEmpty() ? IR::U32{} : IR::U32{lod},
|
||||
ms.IsEmpty() ? IR::U32{} : IR::U32{ms}, inst_info);
|
||||
const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
inst.ReplaceUsesWith(swizzled);
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
// Insert gds binding in the shader if it doesn't exist already.
|
||||
// The buffer is used for append/consume counters.
|
||||
constexpr static AmdGpu::Buffer GdsSharp{.base_address = 1};
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = GdsSharp,
|
||||
.is_gds_buffer = true,
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
const auto is_storage = image_res.IsStorage(image);
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageRead) {
|
||||
auto texel = ir.ImageRead(handle, coords, lod, ms, inst_info);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
const auto converted =
|
||||
ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.ReplaceUsesWith(converted);
|
||||
} else {
|
||||
inst.SetArg(1, coords);
|
||||
if (inst.GetOpcode() == IR::Opcode::ImageWrite) {
|
||||
inst.SetArg(2, lod);
|
||||
inst.SetArg(3, ms);
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
const u32 gds_addr = [&] {
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
return gds_offset.U32() & 0xFFFF;
|
||||
auto texel = inst.Arg(4);
|
||||
if (is_storage) {
|
||||
// Storage image requires shader swizzle.
|
||||
texel = ApplySwizzle(ir, texel, image.DstSelect());
|
||||
}
|
||||
const auto converted =
|
||||
ApplyWriteNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
inst.SetArg(4, converted);
|
||||
}
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
return m0_val & 0xFFFF;
|
||||
}();
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceTrackingPass(IR::Program& program) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
||||
// Pass 1: Track resource sharps
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInstruction(*block, inst, info, descriptors);
|
||||
continue;
|
||||
}
|
||||
if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingInstruction(*block, inst, info, descriptors);
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Second pass to reinterpret format read/write where needed, since we now know
|
||||
// the bindings and their properties.
|
||||
|
||||
// Pass 2: Patch instruction args
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferInterpretation(*block, inst, info);
|
||||
continue;
|
||||
}
|
||||
if (IsImageInstruction(inst)) {
|
||||
PatchImageInterpretation(*block, inst, info);
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsTextureBufferInstruction(inst)) {
|
||||
PatchTextureBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
@ -21,4 +21,66 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the read direction.
|
||||
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
return ir.ConvertUToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
return ir.ConvertSToF(32, 32, ir.BitCast<U32>(value));
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert 0...1 to -1...1
|
||||
return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyReadNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyReadNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the write direction.
|
||||
inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
switch (conversion) {
|
||||
case AmdGpu::NumberConversion::None:
|
||||
return value;
|
||||
case AmdGpu::NumberConversion::UintToUscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToU(32, value)});
|
||||
case AmdGpu::NumberConversion::SintToSscaled:
|
||||
// Need to return float type to maintain IR semantics.
|
||||
return ir.BitCast<F32>(U32{ir.ConvertFToS(32, value)});
|
||||
case AmdGpu::NumberConversion::UnormToUbnorm:
|
||||
// Convert -1...1 to 0...1
|
||||
return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f));
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
inline Value ApplyWriteNumberConversionVec4(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
if (conversion == AmdGpu::NumberConversion::None) {
|
||||
return value;
|
||||
}
|
||||
const auto x = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 0)}, conversion);
|
||||
const auto y = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 1)}, conversion);
|
||||
const auto z = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 2)}, conversion);
|
||||
const auto w = ApplyWriteNumberConversion(ir, F32{ir.CompositeExtract(value, 3)}, conversion);
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
@ -180,6 +180,7 @@ struct FragmentRuntimeInfo {
|
||||
std::array<PsInput, 32> inputs;
|
||||
struct PsColorBuffer {
|
||||
AmdGpu::NumberFormat num_format;
|
||||
AmdGpu::NumberConversion num_conversion;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
|
||||
auto operator<=>(const PsColorBuffer&) const noexcept = default;
|
||||
|
@ -32,6 +32,7 @@ struct BufferSpecialization {
|
||||
struct TextureBufferSpecialization {
|
||||
bool is_integer = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const TextureBufferSpecialization&) const = default;
|
||||
};
|
||||
@ -41,6 +42,7 @@ struct ImageSpecialization {
|
||||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
@ -107,6 +109,7 @@ struct StageSpecialization {
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
@ -116,6 +119,7 @@ struct StageSpecialization {
|
||||
if (spec.is_storage) {
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
}
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
ForEachSharp(binding, fmasks, info->fmasks,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
|
@ -454,7 +454,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::DrawIndirect: {
|
||||
const auto* draw_indirect = reinterpret_cast<const PM4CmdDrawIndirect*>(header);
|
||||
const auto offset = draw_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(DrawIndirectArgs);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
@ -462,7 +461,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
if (rasterizer) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndirect", cmd_address));
|
||||
rasterizer->DrawIndirect(false, ib_address, offset, size, 1, 0);
|
||||
rasterizer->DrawIndirect(false, indirect_args_addr, offset, size, 1, 0);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -471,7 +470,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto* draw_index_indirect =
|
||||
reinterpret_cast<const PM4CmdDrawIndexIndirect*>(header);
|
||||
const auto offset = draw_index_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(DrawIndexedIndirectArgs);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
@ -480,7 +478,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndexIndirect", cmd_address));
|
||||
rasterizer->DrawIndirect(true, ib_address, offset, size, 1, 0);
|
||||
rasterizer->DrawIndirect(true, indirect_args_addr, offset, size, 1, 0);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -489,7 +487,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto* draw_index_indirect =
|
||||
reinterpret_cast<const PM4CmdDrawIndexIndirectMulti*>(header);
|
||||
const auto offset = draw_index_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDump(base_addr, reinterpret_cast<uintptr_t>(header), regs);
|
||||
}
|
||||
@ -497,9 +494,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address));
|
||||
rasterizer->DrawIndirect(true, ib_address, offset, draw_index_indirect->stride,
|
||||
draw_index_indirect->count,
|
||||
draw_index_indirect->countAddr);
|
||||
rasterizer->DrawIndirect(
|
||||
true, indirect_args_addr, offset, draw_index_indirect->stride,
|
||||
draw_index_indirect->count, draw_index_indirect->countAddr);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -528,7 +525,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||
auto& cs_program = GetCsRegs();
|
||||
const auto offset = dispatch_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
|
||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
@ -538,7 +534,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(
|
||||
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
|
||||
rasterizer->DispatchIndirect(ib_address, offset, size);
|
||||
rasterizer->DispatchIndirect(indirect_args_addr, offset, size);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
@ -562,7 +558,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
case PM4ItOpcode::SetBase: {
|
||||
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
|
||||
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
|
||||
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>();
|
||||
indirect_args_addr = set_base->Address<u64>();
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::EventWrite: {
|
||||
@ -823,10 +819,10 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||
break;
|
||||
}
|
||||
case PM4ItOpcode::DispatchIndirect: {
|
||||
const auto* dispatch_indirect = reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
|
||||
const auto* dispatch_indirect =
|
||||
reinterpret_cast<const PM4CmdDispatchIndirectMec*>(header);
|
||||
auto& cs_program = GetCsRegs();
|
||||
const auto offset = dispatch_indirect->data_offset;
|
||||
const auto ib_address = mapped_queues[vqid].indirect_args_addr;
|
||||
const auto ib_address = dispatch_indirect->Address<VAddr>();
|
||||
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
|
||||
if (DebugState.DumpingCurrentReg()) {
|
||||
DebugState.PushRegsDumpCompute(base_addr, reinterpret_cast<uintptr_t>(header),
|
||||
@ -835,7 +831,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
|
||||
if (rasterizer && (cs_program.dispatch_initiator & 1)) {
|
||||
const auto cmd_address = reinterpret_cast<const void*>(header);
|
||||
rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address));
|
||||
rasterizer->DispatchIndirect(ib_address, offset, size);
|
||||
rasterizer->DispatchIndirect(ib_address, 0, size);
|
||||
rasterizer->ScopeMarkerEnd();
|
||||
}
|
||||
break;
|
||||
|
@ -20,9 +20,9 @@
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
class Rasterizer;
|
||||
@ -814,7 +814,9 @@ struct Liverpool {
|
||||
BitField<26, 1, u32> fmask_compression_disable_ci;
|
||||
BitField<27, 1, u32> fmask_compress_1frag_only;
|
||||
BitField<28, 1, u32> dcc_enable;
|
||||
BitField<29, 1, u32> cmask_addr_type;
|
||||
BitField<29, 2, u32> cmask_addr_type;
|
||||
/// Neo-mode only
|
||||
BitField<31, 1, u32> alt_tile_mode;
|
||||
|
||||
u32 u32all;
|
||||
} info;
|
||||
@ -900,6 +902,10 @@ struct Liverpool {
|
||||
: info.number_type.Value());
|
||||
}
|
||||
|
||||
[[nodiscard]] NumberConversion GetNumberConversion() const {
|
||||
return MapNumberConversion(info.number_type);
|
||||
}
|
||||
|
||||
[[nodiscard]] CompMapping Swizzle() const {
|
||||
// clang-format off
|
||||
static constexpr std::array<std::array<CompMapping, 4>, 4> mrt_swizzles{{
|
||||
@ -936,7 +942,7 @@ struct Liverpool {
|
||||
const auto swap_idx = static_cast<u32>(info.comp_swap.Value());
|
||||
const auto components_idx = NumComponents(info.format) - 1;
|
||||
const auto mrt_swizzle = mrt_swizzles[swap_idx][components_idx];
|
||||
return RemapComponents(info.format, mrt_swizzle);
|
||||
return RemapSwizzle(info.format, mrt_swizzle);
|
||||
}
|
||||
};
|
||||
|
||||
@ -1477,11 +1483,12 @@ private:
|
||||
std::vector<u32> ccb_buffer;
|
||||
std::queue<Task::Handle> submits{};
|
||||
ComputeProgram cs_state{};
|
||||
VAddr indirect_args_addr{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
u32 num_mapped_queues{1u}; // GFX is always available
|
||||
|
||||
VAddr indirect_args_addr{};
|
||||
|
||||
struct ConstantEngine {
|
||||
void Reset() {
|
||||
ce_count = 0;
|
||||
|
@ -100,7 +100,7 @@ std::string_view NameOf(NumberFormat fmt) {
|
||||
return "Srgb";
|
||||
case NumberFormat::Ubnorm:
|
||||
return "Ubnorm";
|
||||
case NumberFormat::UbnromNz:
|
||||
case NumberFormat::UbnormNz:
|
||||
return "UbnormNz";
|
||||
case NumberFormat::Ubint:
|
||||
return "Ubint";
|
||||
|
@ -204,6 +204,11 @@ struct PM4CmdSetData {
|
||||
static constexpr u32* SetShReg(u32* cmdbuf, Args... data) {
|
||||
return WritePacket<PM4ItOpcode::SetShReg>(cmdbuf, type, data...);
|
||||
}
|
||||
|
||||
template <PM4ShaderType type = PM4ShaderType::ShaderGraphics, typename... Args>
|
||||
static constexpr u32* SetUconfigReg(u32* cmdbuf, Args... data) {
|
||||
return WritePacket<PM4ItOpcode::SetUconfigReg>(cmdbuf, type, data...);
|
||||
}
|
||||
};
|
||||
|
||||
struct PM4CmdNop {
|
||||
@ -791,6 +796,18 @@ struct PM4CmdDispatchIndirect {
|
||||
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||
};
|
||||
|
||||
struct PM4CmdDispatchIndirectMec {
|
||||
PM4Type3Header header;
|
||||
u32 address0;
|
||||
u32 address1;
|
||||
u32 dispatch_initiator; ///< Dispatch Initiator Register
|
||||
|
||||
template <typename T>
|
||||
T Address() const {
|
||||
return std::bit_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
|
||||
}
|
||||
};
|
||||
|
||||
struct DrawIndirectArgs {
|
||||
u32 vertex_count_per_instance;
|
||||
u32 instance_count;
|
||||
|
@ -11,96 +11,6 @@
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
return format;
|
||||
}
|
||||
|
||||
inline CompMapping RemapComponents(const DataFormat format, const CompMapping components) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = components.b;
|
||||
result.g = components.g;
|
||||
result.b = components.r;
|
||||
result.a = components.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = components.a;
|
||||
result.g = components.b;
|
||||
result.b = components.g;
|
||||
result.a = components.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return components;
|
||||
}
|
||||
}
|
||||
|
||||
// Table 8.5 Buffer Resource Descriptor [Sea Islands Series Instruction Set Architecture]
|
||||
struct Buffer {
|
||||
u64 base_address : 44;
|
||||
@ -140,7 +50,7 @@ struct Buffer {
|
||||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
NumberFormat GetNumberFmt() const noexcept {
|
||||
@ -151,6 +61,10 @@ struct Buffer {
|
||||
return RemapDataFormat(DataFormat(data_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
u32 GetStride() const noexcept {
|
||||
return stride;
|
||||
}
|
||||
@ -263,7 +177,15 @@ struct Image {
|
||||
u64 min_lod_warn : 12;
|
||||
u64 counter_bank_id : 8;
|
||||
u64 lod_hw_cnt_en : 1;
|
||||
u64 : 43;
|
||||
/// Neo-mode only
|
||||
u64 compression_en : 1;
|
||||
/// Neo-mode only
|
||||
u64 alpha_is_on_msb : 1;
|
||||
/// Neo-mode only
|
||||
u64 color_transform : 1;
|
||||
/// Neo-mode only
|
||||
u64 alt_tile_mode : 1;
|
||||
u64 : 39;
|
||||
|
||||
static constexpr Image Null() {
|
||||
Image image{};
|
||||
@ -297,7 +219,7 @@ struct Image {
|
||||
.b = CompSwizzle(dst_sel_z),
|
||||
.a = CompSwizzle(dst_sel_w),
|
||||
};
|
||||
return RemapComponents(DataFormat(data_format), dst_sel);
|
||||
return RemapSwizzle(DataFormat(data_format), dst_sel);
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
@ -346,6 +268,10 @@ struct Image {
|
||||
return RemapNumberFormat(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
NumberConversion GetNumberConversion() const noexcept {
|
||||
return MapNumberConversion(NumberFormat(num_format));
|
||||
}
|
||||
|
||||
TilingMode GetTilingMode() const {
|
||||
if (tiling_index >= 0 && tiling_index <= 7) {
|
||||
return tiling_index == 5 ? TilingMode::Texture_MicroTiled
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <string_view>
|
||||
#include <fmt/format.h>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace AmdGpu {
|
||||
@ -177,11 +178,130 @@ enum class NumberFormat : u32 {
|
||||
Float = 7,
|
||||
Srgb = 9,
|
||||
Ubnorm = 10,
|
||||
UbnromNz = 11,
|
||||
UbnormNz = 11,
|
||||
Ubint = 12,
|
||||
Ubscaled = 13,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
Green = 5,
|
||||
Blue = 6,
|
||||
Alpha = 7,
|
||||
};
|
||||
|
||||
enum class NumberConversion : u32 {
|
||||
None,
|
||||
UintToUscaled,
|
||||
SintToSscaled,
|
||||
UnormToUbnorm,
|
||||
};
|
||||
|
||||
struct CompMapping {
|
||||
CompSwizzle r : 3;
|
||||
CompSwizzle g : 3;
|
||||
CompSwizzle b : 3;
|
||||
CompSwizzle a : 3;
|
||||
|
||||
auto operator<=>(const CompMapping& other) const = default;
|
||||
|
||||
template <typename T>
|
||||
[[nodiscard]] std::array<T, 4> Apply(const std::array<T, 4>& data) const {
|
||||
return {
|
||||
ApplySingle(data, r),
|
||||
ApplySingle(data, g),
|
||||
ApplySingle(data, b),
|
||||
ApplySingle(data, a),
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
switch (swizzle) {
|
||||
case CompSwizzle::Zero:
|
||||
return T(0);
|
||||
case CompSwizzle::One:
|
||||
return T(1);
|
||||
case CompSwizzle::Red:
|
||||
return data[0];
|
||||
case CompSwizzle::Green:
|
||||
return data[1];
|
||||
case CompSwizzle::Blue:
|
||||
return data[2];
|
||||
case CompSwizzle::Alpha:
|
||||
return data[3];
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
inline DataFormat RemapDataFormat(const DataFormat format) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10:
|
||||
return DataFormat::Format10_11_11;
|
||||
case DataFormat::Format10_10_10_2:
|
||||
return DataFormat::Format2_10_10_10;
|
||||
case DataFormat::Format5_5_5_1:
|
||||
return DataFormat::Format1_5_5_5;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberFormat RemapNumberFormat(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberFormat::Uint;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberFormat::Sint;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberFormat::Unorm;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
|
||||
switch (format) {
|
||||
case DataFormat::Format11_11_10: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.b;
|
||||
result.g = swizzle.g;
|
||||
result.b = swizzle.r;
|
||||
result.a = swizzle.a;
|
||||
return result;
|
||||
}
|
||||
case DataFormat::Format10_10_10_2:
|
||||
case DataFormat::Format5_5_5_1: {
|
||||
CompMapping result;
|
||||
result.r = swizzle.a;
|
||||
result.g = swizzle.b;
|
||||
result.b = swizzle.g;
|
||||
result.a = swizzle.r;
|
||||
return result;
|
||||
}
|
||||
default:
|
||||
return swizzle;
|
||||
}
|
||||
}
|
||||
|
||||
inline NumberConversion MapNumberConversion(const NumberFormat format) {
|
||||
switch (format) {
|
||||
case NumberFormat::Uscaled:
|
||||
return NumberConversion::UintToUscaled;
|
||||
case NumberFormat::Sscaled:
|
||||
return NumberConversion::SintToSscaled;
|
||||
case NumberFormat::Ubnorm:
|
||||
return NumberConversion::UnormToUbnorm;
|
||||
default:
|
||||
return NumberConversion::None;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace AmdGpu
|
||||
|
||||
template <>
|
||||
|
@ -119,19 +119,23 @@ public:
|
||||
return buffer;
|
||||
}
|
||||
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask,
|
||||
vk::PipelineStageFlagBits2 dst_stage) {
|
||||
std::optional<vk::BufferMemoryBarrier2> GetBarrier(
|
||||
vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
|
||||
u32 offset = 0) {
|
||||
if (dst_acess_mask == access_mask && stage == dst_stage) {
|
||||
return {};
|
||||
}
|
||||
|
||||
DEBUG_ASSERT(offset < size_bytes);
|
||||
|
||||
auto barrier = vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = stage,
|
||||
.srcAccessMask = access_mask,
|
||||
.dstStageMask = dst_stage,
|
||||
.dstAccessMask = dst_acess_mask,
|
||||
.buffer = buffer.buffer,
|
||||
.size = size_bytes,
|
||||
.offset = offset,
|
||||
.size = size_bytes - offset,
|
||||
};
|
||||
access_mask = dst_acess_mask;
|
||||
stage = dst_stage;
|
||||
@ -150,8 +154,10 @@ public:
|
||||
Vulkan::Scheduler* scheduler;
|
||||
MemoryUsage usage;
|
||||
UniqueBuffer buffer;
|
||||
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone};
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone};
|
||||
vk::Flags<vk::AccessFlagBits2> access_mask{
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite};
|
||||
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
|
||||
};
|
||||
|
||||
class StreamBuffer : public Buffer {
|
||||
|
@ -34,21 +34,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
|
||||
// Ensure the first slot is used for the null buffer
|
||||
const auto null_id =
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1);
|
||||
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 16);
|
||||
ASSERT(null_id.index == 0);
|
||||
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
|
||||
|
||||
const vk::BufferViewCreateInfo null_view_ci = {
|
||||
.buffer = null_buffer,
|
||||
.format = vk::Format::eR8Unorm,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
};
|
||||
const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);
|
||||
ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
|
||||
null_buffer_view = null_view;
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() = default;
|
||||
@ -479,43 +468,36 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
||||
};
|
||||
scheduler.EndRendering();
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const std::array pre_barriers = {
|
||||
vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.buffer = overlap.Handle(),
|
||||
.offset = 0,
|
||||
.size = overlap.SizeBytes(),
|
||||
},
|
||||
};
|
||||
const std::array post_barriers = {
|
||||
vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = overlap.Handle(),
|
||||
.offset = 0,
|
||||
.size = overlap.SizeBytes(),
|
||||
},
|
||||
vk::BufferMemoryBarrier2{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
.buffer = new_buffer.Handle(),
|
||||
.offset = dst_base_offset,
|
||||
.size = overlap.SizeBytes(),
|
||||
},
|
||||
};
|
||||
|
||||
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> pre_barriers{};
|
||||
if (auto src_barrier = overlap.GetBarrier(vk::AccessFlagBits2::eTransferRead,
|
||||
vk::PipelineStageFlagBits2::eTransfer)) {
|
||||
pre_barriers.push_back(*src_barrier);
|
||||
}
|
||||
if (auto dst_barrier =
|
||||
new_buffer.GetBarrier(vk::AccessFlagBits2::eTransferWrite,
|
||||
vk::PipelineStageFlagBits2::eTransfer, dst_base_offset)) {
|
||||
pre_barriers.push_back(*dst_barrier);
|
||||
}
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(pre_barriers.size()),
|
||||
.pBufferMemoryBarriers = pre_barriers.data(),
|
||||
});
|
||||
|
||||
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
|
||||
|
||||
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> post_barriers{};
|
||||
if (auto src_barrier =
|
||||
overlap.GetBarrier(vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
vk::PipelineStageFlagBits2::eAllCommands)) {
|
||||
post_barriers.push_back(*src_barrier);
|
||||
}
|
||||
if (auto dst_barrier = new_buffer.GetBarrier(
|
||||
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
|
||||
vk::PipelineStageFlagBits2::eAllCommands, dst_base_offset)) {
|
||||
post_barriers.push_back(*dst_barrier);
|
||||
}
|
||||
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
|
||||
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
|
||||
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
|
||||
@ -626,7 +608,8 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
|
||||
const auto cmdbuf = scheduler.CommandBuffer();
|
||||
const vk::BufferMemoryBarrier2 pre_barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
|
||||
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = buffer.Handle(),
|
||||
@ -660,7 +643,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
|
||||
FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
|
||||
TextureCache::BaseDesc desc{};
|
||||
desc.info.guest_address = device_addr;
|
||||
desc.info.guest_size_bytes = size;
|
||||
desc.info.guest_size = size;
|
||||
const ImageId image_id = texture_cache.FindImage(desc, find_flags);
|
||||
if (!image_id) {
|
||||
return false;
|
||||
|
@ -71,10 +71,6 @@ public:
|
||||
return slot_buffers[id];
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::BufferView& NullBufferView() {
|
||||
return null_buffer_view;
|
||||
}
|
||||
|
||||
/// Invalidates any buffer in the logical page range.
|
||||
void InvalidateMemory(VAddr device_addr, u64 size);
|
||||
|
||||
@ -160,7 +156,6 @@ private:
|
||||
std::shared_mutex mutex;
|
||||
Common::SlotVector<Buffer> slot_buffers;
|
||||
RangeSet gpu_modified_ranges;
|
||||
vk::BufferView null_buffer_view;
|
||||
MemoryTracker memory_tracker;
|
||||
PageTable page_table;
|
||||
};
|
||||
|
@ -2,13 +2,14 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(SHADER_FILES
|
||||
detile_m8x1.comp
|
||||
detile_m8x2.comp
|
||||
detile_m32x1.comp
|
||||
detile_m32x2.comp
|
||||
detile_m32x4.comp
|
||||
detile_macro32x1.comp
|
||||
detile_macro32x2.comp
|
||||
detilers/macro_32bpp.comp
|
||||
detilers/macro_64bpp.comp
|
||||
detilers/macro_8bpp.comp
|
||||
detilers/micro_128bpp.comp
|
||||
detilers/micro_16bpp.comp
|
||||
detilers/micro_32bpp.comp
|
||||
detilers/micro_64bpp.comp
|
||||
detilers/micro_8bpp.comp
|
||||
fs_tri.vert
|
||||
post_process.frag
|
||||
)
|
||||
|
@ -87,7 +87,7 @@ void main() {
|
||||
uint offs = slice_offs + tile_offs + (idx * BPP / 8);
|
||||
|
||||
uint p0 = in_data[(offs >> 2) + 0];
|
||||
uint p1 = in_data[(offs >> 2) + 1];
|
||||
uint p1 = in_data[(offs >> 2) + 1];
|
||||
out_data[2 * gl_GlobalInvocationID.x + 0] = p0;
|
||||
out_data[2 * gl_GlobalInvocationID.x + 1] = p1;
|
||||
out_data[2 * gl_GlobalInvocationID.x + 1] = p1;
|
||||
}
|
101
src/video_core/host_shaders/detilers/macro_8bpp.comp
Normal file
101
src/video_core/host_shaders/detilers/macro_8bpp.comp
Normal file
@ -0,0 +1,101 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#version 450
|
||||
|
||||
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(std430, binding = 0) buffer input_buf {
|
||||
uint in_data[];
|
||||
};
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint out_data[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform image_info {
|
||||
uint num_levels;
|
||||
uint pitch;
|
||||
uint height;
|
||||
uint c0;
|
||||
uint c1;
|
||||
} info;
|
||||
|
||||
const uint lut_8bpp[][16] = {
|
||||
{
|
||||
0x05040100, 0x45444140,
|
||||
0x07060302, 0x47464342,
|
||||
0x0d0c0908, 0x4d4c4948,
|
||||
0x0f0e0b0a, 0x4f4e4b4a,
|
||||
0x85848180, 0xc5c4c1c0,
|
||||
0x87868382, 0xc7c6c3c2,
|
||||
0x8d8c8988, 0xcdccc9c8,
|
||||
0x8f8e8b8a, 0xcfcecbca,
|
||||
},
|
||||
{
|
||||
0x15141110, 0x55545150,
|
||||
0x17161312, 0x57565352,
|
||||
0x1d1c1918, 0x5d5c5958,
|
||||
0x1f1e1b1a, 0x5f5e5b5a,
|
||||
0x95949190, 0xd5d4d1d0,
|
||||
0x97969392, 0xd7d6d3d2,
|
||||
0x9d9c9998, 0xdddcd9d8,
|
||||
0x9f9e9b9a, 0xdfdedbda,
|
||||
},
|
||||
{
|
||||
0x25242120, 0x65646160,
|
||||
0x27262322, 0x67666362,
|
||||
0x2d2c2928, 0x6d6c6968,
|
||||
0x2f2e2b2a, 0x6f6e6b6a,
|
||||
0xa5a4a1a0, 0xe5e4e1e0,
|
||||
0xa7a6a3a2, 0xe7e6e3e2,
|
||||
0xadaca9a8, 0xedece9e8,
|
||||
0xafaeabaa, 0xefeeebea,
|
||||
},
|
||||
{
|
||||
0x35343130, 0x75747170,
|
||||
0x37363332, 0x77767372,
|
||||
0x3d3c3938, 0x7d7c7978,
|
||||
0x3f3e3b3a, 0x7f7e7b7a,
|
||||
0xb5b4b1b0, 0xf5f4f1f0,
|
||||
0xb7b6b3b2, 0xf7f6f3f2,
|
||||
0xbdbcb9b8, 0xfdfcf9f8,
|
||||
0xbfbebbba, 0xfffefbfa,
|
||||
},
|
||||
};
|
||||
|
||||
#define MICRO_TILE_DIM (8)
|
||||
#define MICRO_TILE_SZ (256)
|
||||
#define TEXELS_PER_ELEMENT (1)
|
||||
#define BPP (8)
|
||||
|
||||
shared uint scratch[16];
|
||||
|
||||
void main() {
|
||||
uint slot = gl_LocalInvocationID.x >> 2u;
|
||||
atomicAnd(scratch[slot], 0);
|
||||
|
||||
uint x = gl_GlobalInvocationID.x % info.pitch;
|
||||
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height;
|
||||
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height);
|
||||
|
||||
uint col = bitfieldExtract(x, 0, 3);
|
||||
uint row = bitfieldExtract(y, 0, 3);
|
||||
uint lut = bitfieldExtract(z, 0, 2);
|
||||
uint idx_dw = lut_8bpp[lut][(col + row * MICRO_TILE_DIM) >> 2u];
|
||||
uint byte_ofs = (gl_LocalInvocationID.x & 3u) * 8;
|
||||
uint idx = bitfieldExtract(idx_dw >> byte_ofs, 0, 8);
|
||||
|
||||
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
|
||||
uint tile_row = y / MICRO_TILE_DIM;
|
||||
uint tile_column = x / MICRO_TILE_DIM;
|
||||
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ;
|
||||
uint offs = (slice_offs + tile_offs) + (idx * BPP / 8);
|
||||
|
||||
uint p0 = in_data[offs >> 2u];
|
||||
uint byte = bitfieldExtract(p0 >> (offs * 8), 0, 8);
|
||||
atomicOr(scratch[slot], byte << byte_ofs);
|
||||
|
||||
if (byte_ofs == 0) {
|
||||
out_data[gl_GlobalInvocationID.x >> 2u] = scratch[slot];
|
||||
}
|
||||
}
|
@ -447,7 +447,7 @@ static constexpr vk::FormatFeatureFlags2 GetNumberFormatFeatureFlags(
|
||||
case AmdGpu::NumberFormat::Srgb:
|
||||
return ImageRead | Mrt;
|
||||
case AmdGpu::NumberFormat::Ubnorm:
|
||||
case AmdGpu::NumberFormat::UbnromNz:
|
||||
case AmdGpu::NumberFormat::UbnormNz:
|
||||
case AmdGpu::NumberFormat::Ubint:
|
||||
case AmdGpu::NumberFormat::Ubscaled:
|
||||
return ImageRead;
|
||||
@ -468,6 +468,7 @@ static constexpr SurfaceFormatInfo CreateSurfaceFormatInfo(const AmdGpu::DataFor
|
||||
}
|
||||
|
||||
std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
// Uscaled, Sscaled, and Ubnorm formats are automatically remapped and handled in shader.
|
||||
static constexpr std::array formats{
|
||||
// Invalid
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Unorm,
|
||||
@ -490,7 +491,7 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubnorm,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnromNz,
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::UbnormNz,
|
||||
vk::Format::eUndefined),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatInvalid, AmdGpu::NumberFormat::Ubint,
|
||||
vk::Format::eUndefined),
|
||||
@ -501,10 +502,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8, AmdGpu::NumberFormat::Sint,
|
||||
@ -516,10 +513,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16, AmdGpu::NumberFormat::Sint,
|
||||
@ -531,10 +524,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR8G8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8, AmdGpu::NumberFormat::Sint,
|
||||
@ -553,10 +542,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR16G16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR16G16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR16G16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16, AmdGpu::NumberFormat::Sint,
|
||||
@ -573,10 +558,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eA2B10G10R10UnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eA2B10G10R10SnormPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eA2B10G10R10UscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eA2B10G10R10SscaledPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eA2B10G10R10UintPack32),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format2_10_10_10, AmdGpu::NumberFormat::Sint,
|
||||
@ -586,10 +567,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR8G8B8A8Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR8G8B8A8Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uscaled,
|
||||
vk::Format::eR8G8B8A8Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sscaled,
|
||||
vk::Format::eR8G8B8A8Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR8G8B8A8Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format8_8_8_8, AmdGpu::NumberFormat::Sint,
|
||||
@ -608,10 +585,6 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
|
||||
vk::Format::eR16G16B16A16Unorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Snorm,
|
||||
vk::Format::eR16G16B16A16Snorm),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Uscaled, vk::Format::eR16G16B16A16Uscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16,
|
||||
AmdGpu::NumberFormat::Sscaled, vk::Format::eR16G16B16A16Sscaled),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Uint,
|
||||
vk::Format::eR16G16B16A16Uint),
|
||||
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format16_16_16_16, AmdGpu::NumberFormat::Sint,
|
||||
|
@ -18,6 +18,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
|
||||
auto& info = stages[int(Shader::LogicalStage::Compute)];
|
||||
info = &info_;
|
||||
const auto debug_str = GetDebugString();
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
@ -89,8 +90,9 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
};
|
||||
const auto device = instance.GetDevice();
|
||||
auto [descriptor_set_result, descriptor_set] =
|
||||
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
device.createDescriptorSetLayoutUnique(desc_layout_ci);
|
||||
ASSERT_MSG(descriptor_set_result == vk::Result::eSuccess,
|
||||
"Failed to create compute descriptor set layout: {}",
|
||||
vk::to_string(descriptor_set_result));
|
||||
@ -107,6 +109,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||
"Failed to create compute pipeline layout: {}", vk::to_string(layout_result));
|
||||
pipeline_layout = std::move(layout);
|
||||
SetObjectName(device, *pipeline_layout, "Compute PipelineLayout {}", debug_str);
|
||||
|
||||
const vk::ComputePipelineCreateInfo compute_pipeline_ci = {
|
||||
.stage = shader_ci,
|
||||
@ -117,6 +120,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
|
||||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
pipeline = std::move(pipe);
|
||||
SetObjectName(device, *pipeline, "Compute Pipeline {}", debug_str);
|
||||
}
|
||||
|
||||
ComputePipeline::~ComputePipeline() = default;
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/io_file.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_quad_rect.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
@ -16,6 +15,7 @@
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
@ -36,6 +36,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
const vk::Device device = instance.GetDevice();
|
||||
std::ranges::copy(infos, stages.begin());
|
||||
BuildDescSetLayout();
|
||||
const auto debug_str = GetDebugString();
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = gp_stage_flags,
|
||||
@ -54,6 +55,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
ASSERT_MSG(layout_result == vk::Result::eSuccess,
|
||||
"Failed to create graphics pipeline layout: {}", vk::to_string(layout_result));
|
||||
pipeline_layout = std::move(layout);
|
||||
SetObjectName(device, *pipeline_layout, "Graphics PipelineLayout {}", debug_str);
|
||||
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
@ -322,6 +324,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create graphics pipeline: {}",
|
||||
vk::to_string(pipeline_result));
|
||||
pipeline = std::move(pipe);
|
||||
SetObjectName(device, *pipeline, "Graphics Pipeline {}", debug_str);
|
||||
}
|
||||
|
||||
GraphicsPipeline::~GraphicsPipeline() = default;
|
||||
|
@ -32,6 +32,7 @@ struct GraphicsPipelineKey {
|
||||
u32 num_color_attachments;
|
||||
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
|
||||
std::array<AmdGpu::NumberFormat, Liverpool::NumColorBuffers> color_num_formats;
|
||||
std::array<AmdGpu::NumberConversion, Liverpool::NumColorBuffers> color_num_conversions;
|
||||
std::array<AmdGpu::CompMapping, Liverpool::NumColorBuffers> color_swizzles;
|
||||
vk::Format depth_format;
|
||||
vk::Format stencil_format;
|
||||
|
@ -92,13 +92,15 @@ std::string GetReadableVersion(u32 version) {
|
||||
Instance::Instance(bool enable_validation, bool enable_crash_diagnostic)
|
||||
: instance{CreateInstance(Frontend::WindowSystemType::Headless, enable_validation,
|
||||
enable_crash_diagnostic)},
|
||||
physical_devices{EnumeratePhysicalDevices(instance)} {}
|
||||
physical_devices{EnumeratePhysicalDevices(instance)},
|
||||
crash_diagnostic{enable_crash_diagnostic} {}
|
||||
|
||||
Instance::Instance(Frontend::WindowSDL& window, s32 physical_device_index,
|
||||
bool enable_validation /*= false*/, bool enable_crash_diagnostic /*= false*/)
|
||||
: instance{CreateInstance(window.GetWindowInfo().type, enable_validation,
|
||||
enable_crash_diagnostic)},
|
||||
physical_devices{EnumeratePhysicalDevices(instance)} {
|
||||
physical_devices{EnumeratePhysicalDevices(instance)},
|
||||
crash_diagnostic{enable_crash_diagnostic} {
|
||||
if (enable_validation) {
|
||||
debug_callback = CreateDebugCallback(*instance);
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ public:
|
||||
|
||||
/// Returns true when a known debugging tool is attached.
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_renderdoc || has_nsight_graphics;
|
||||
return crash_diagnostic || has_renderdoc || has_nsight_graphics;
|
||||
}
|
||||
|
||||
/// Returns true if anisotropic filtering is supported
|
||||
@ -338,6 +338,7 @@ private:
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool crash_diagnostic{};
|
||||
bool has_nsight_graphics{};
|
||||
bool has_renderdoc{};
|
||||
};
|
||||
|
@ -168,6 +168,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
|
||||
info.fs_info.color_buffers[i] = {
|
||||
.num_format = graphics_key.color_num_formats[i],
|
||||
.num_conversion = graphics_key.color_num_conversions[i],
|
||||
.swizzle = graphics_key.color_swizzles[i],
|
||||
};
|
||||
}
|
||||
@ -302,6 +303,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.num_color_attachments = 0;
|
||||
key.color_formats.fill(vk::Format::eUndefined);
|
||||
key.color_num_formats.fill(AmdGpu::NumberFormat::Unorm);
|
||||
key.color_num_conversions.fill(AmdGpu::NumberConversion::None);
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.color_swizzles.fill({});
|
||||
@ -330,6 +332,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.color_formats[remapped_cb] =
|
||||
LiverpoolToVK::SurfaceFormat(col_buf.GetDataFmt(), col_buf.GetNumberFmt());
|
||||
key.color_num_formats[remapped_cb] = col_buf.GetNumberFmt();
|
||||
key.color_num_conversions[remapped_cb] = col_buf.GetNumberConversion();
|
||||
key.color_swizzles[remapped_cb] = col_buf.Swizzle();
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_common.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
@ -55,4 +56,19 @@ void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers&
|
||||
cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {});
|
||||
}
|
||||
|
||||
std::string Pipeline::GetDebugString() const {
|
||||
std::string stage_desc;
|
||||
for (const auto& stage : stages) {
|
||||
if (stage) {
|
||||
const auto shader_name = PipelineCache::GetShaderName(stage->stage, stage->pgm_hash);
|
||||
if (stage_desc.empty()) {
|
||||
stage_desc = shader_name;
|
||||
} else {
|
||||
stage_desc = fmt::format("{},{}", stage_desc, shader_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return stage_desc;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -61,6 +61,8 @@ public:
|
||||
const Shader::PushData& push_data) const;
|
||||
|
||||
protected:
|
||||
[[nodiscard]] std::string GetDebugString() const;
|
||||
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
DescriptorHeap& desc_heap;
|
||||
|
@ -427,7 +427,7 @@ bool Presenter::ShowSplash(Frame* frame /*= nullptr*/) {
|
||||
VideoCore::Extent3D{splash->GetImageInfo().width, splash->GetImageInfo().height, 1};
|
||||
info.pitch = splash->GetImageInfo().width;
|
||||
info.guest_address = VAddr(splash->GetImageData().data());
|
||||
info.guest_size_bytes = splash->GetImageData().size();
|
||||
info.guest_size = splash->GetImageData().size();
|
||||
info.mips_layout.emplace_back(splash->GetImageData().size(),
|
||||
splash->GetImageInfo().width,
|
||||
splash->GetImageInfo().height, 0);
|
||||
|
@ -537,6 +537,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
}
|
||||
|
||||
// Second pass to re-bind buffers that were updated after binding
|
||||
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
|
||||
for (u32 i = 0; i < buffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = buffer_bindings[i];
|
||||
const auto& desc = stage.buffers[i];
|
||||
@ -548,7 +549,6 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
|
||||
} else {
|
||||
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
|
||||
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
|
||||
}
|
||||
} else {
|
||||
@ -582,17 +582,19 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
++binding.buffer;
|
||||
}
|
||||
|
||||
const auto null_buffer_view =
|
||||
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
|
||||
for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
|
||||
const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
|
||||
const auto& desc = stage.texture_buffers[i];
|
||||
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
|
||||
// Fallback format for null buffer view; never used in valid buffer case.
|
||||
const auto data_fmt = vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid
|
||||
? vsharp.GetDataFmt()
|
||||
: AmdGpu::DataFormat::Format8;
|
||||
const u32 fmt_stride = AmdGpu::NumBits(data_fmt) >> 3;
|
||||
vk::BufferView buffer_view;
|
||||
if (buffer_id) {
|
||||
const u32 alignment = instance.TexelBufferMinAlignment();
|
||||
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
|
||||
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
|
||||
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
|
||||
const u32 buf_stride = vsharp.GetStride();
|
||||
ASSERT_MSG(buf_stride % fmt_stride == 0,
|
||||
"Texel buffer stride must match format stride");
|
||||
@ -600,9 +602,8 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
const u32 adjust = offset - offset_aligned;
|
||||
ASSERT(adjust % fmt_stride == 0);
|
||||
push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
|
||||
buffer_view =
|
||||
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
|
||||
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
|
||||
buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust,
|
||||
desc.is_written, data_fmt, vsharp.GetNumberFmt());
|
||||
if (auto barrier =
|
||||
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
|
||||
: vk::AccessFlagBits2::eShaderRead,
|
||||
@ -612,6 +613,11 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
if (desc.is_written) {
|
||||
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
|
||||
}
|
||||
} else if (instance.IsNullDescriptorSupported()) {
|
||||
buffer_view = VK_NULL_HANDLE;
|
||||
} else {
|
||||
buffer_view =
|
||||
null_buffer.View(0, fmt_stride, desc.is_written, data_fmt, vsharp.GetNumberFmt());
|
||||
}
|
||||
|
||||
set_writes.push_back({
|
||||
@ -621,7 +627,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
|
||||
: vk::DescriptorType::eUniformTexelBuffer,
|
||||
.pTexelBufferView = &buffer_view,
|
||||
.pTexelBufferView = &buffer_views.emplace_back(buffer_view),
|
||||
});
|
||||
++binding.buffer;
|
||||
}
|
||||
|
@ -108,7 +108,7 @@ private:
|
||||
std::pair<VideoCore::ImageId, VideoCore::TextureCache::RenderTargetDesc>, 8>
|
||||
cb_descs;
|
||||
std::optional<std::pair<VideoCore::ImageId, VideoCore::TextureCache::DepthTargetDesc>> db_desc;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
|
||||
boost::container::static_vector<vk::DescriptorImageInfo, 64> image_infos;
|
||||
boost::container::static_vector<vk::BufferView, 8> buffer_views;
|
||||
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
|
||||
boost::container::static_vector<VideoCore::ImageId, 64> bound_images;
|
||||
@ -121,7 +121,7 @@ private:
|
||||
using TexBufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
|
||||
boost::container::static_vector<TexBufferBindingInfo, 32> texbuffer_bindings;
|
||||
using ImageBindingInfo = std::pair<VideoCore::ImageId, VideoCore::TextureCache::TextureDesc>;
|
||||
boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
|
||||
boost::container::static_vector<ImageBindingInfo, 64> image_bindings;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -153,7 +153,8 @@ vk::DescriptorSet DescriptorHeap::Commit(vk::DescriptorSetLayout set_layout) {
|
||||
}
|
||||
|
||||
// The pool has run out. Record current tick and place it in pending list.
|
||||
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory,
|
||||
ASSERT_MSG(result == vk::Result::eErrorOutOfPoolMemory ||
|
||||
result == vk::Result::eErrorFragmentedPool,
|
||||
"Unexpected error during descriptor set allocation {}", vk::to_string(result));
|
||||
pending_pools.emplace_back(curr_pool, master_semaphore->CurrentTick());
|
||||
if (const auto [pool, tick] = pending_pools.front(); master_semaphore->IsFree(tick)) {
|
||||
|
@ -210,7 +210,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
|
||||
|
||||
Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {}x{}x{} {:#x}:{:#x}",
|
||||
info.size.width, info.size.height, info.size.depth, info.guest_address,
|
||||
info.guest_size_bytes);
|
||||
info.guest_size);
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::ImageMemoryBarrier2, 32> Image::GetBarriers(
|
||||
|
@ -80,7 +80,7 @@ struct Image {
|
||||
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
||||
const auto image_addr = info.guest_address;
|
||||
const auto image_end = info.guest_address + info.guest_size_bytes;
|
||||
const auto image_end = info.guest_address + info.guest_size;
|
||||
return image_addr < overlap_end && overlap_cpu_addr < image_end;
|
||||
}
|
||||
|
||||
|
@ -3,8 +3,10 @@
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/config.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
#include "video_core/texture_cache/image_info.h"
|
||||
#include "video_core/texture_cache/tile.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
@ -45,195 +47,6 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// The table of macro tiles parameters for given tiling index (row) and bpp (column)
|
||||
static constexpr std::array macro_tile_extents_x1{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
static constexpr std::array macro_tile_extents_x2{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
static constexpr std::array macro_tile_extents_x4{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
static constexpr std::array macro_tile_extents_x8{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
static constexpr std::array macro_tile_extents{
|
||||
macro_tile_extents_x1,
|
||||
macro_tile_extents_x2,
|
||||
macro_tile_extents_x4,
|
||||
macro_tile_extents_x8,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
static constexpr std::pair micro_tile_extent{8u, 8u};
|
||||
static constexpr auto hw_pipe_interleave = 256u;
|
||||
|
||||
static constexpr std::pair<u32, u32> GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples) {
|
||||
ASSERT(num_samples <= 8);
|
||||
const auto row = tiling_idx * 5;
|
||||
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128
|
||||
return (macro_tile_extents[std::log2(num_samples)])[row + column];
|
||||
}
|
||||
|
||||
static constexpr std::pair<u32, size_t> ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples) {
|
||||
const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8));
|
||||
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = height;
|
||||
size_t log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8));
|
||||
while (log_sz % slice_align) {
|
||||
pitch_aligned += pitch_align;
|
||||
log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
}
|
||||
return {pitch_aligned, (log_sz * bpp + 7) / 8};
|
||||
}
|
||||
|
||||
static constexpr std::pair<u32, size_t> ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples) {
|
||||
const auto& [pitch_align, height_align] = micro_tile_extent;
|
||||
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
|
||||
size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
while (log_sz % 256) {
|
||||
pitch_aligned += 8;
|
||||
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
}
|
||||
return {pitch_aligned, log_sz};
|
||||
}
|
||||
|
||||
static constexpr std::pair<u32, size_t> ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples, u32 tiling_idx,
|
||||
u32 mip_n) {
|
||||
const auto& [pitch_align, height_align] = GetMacroTileExtents(tiling_idx, bpp, num_samples);
|
||||
ASSERT(pitch_align != 0 && height_align != 0);
|
||||
bool downgrade_to_micro = false;
|
||||
if (mip_n > 0) {
|
||||
const bool is_less_than_tile = pitch < pitch_align || height < height_align;
|
||||
// TODO: threshold check
|
||||
downgrade_to_micro = is_less_than_tile;
|
||||
}
|
||||
|
||||
if (downgrade_to_micro) {
|
||||
return ImageSizeMicroTiled(pitch, height, bpp, num_samples);
|
||||
}
|
||||
|
||||
const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
|
||||
const auto log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
return {pitch_aligned, (log_sz * bpp + 7) / 8};
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
|
||||
VAddr cpu_address) noexcept {
|
||||
const auto& attrib = group.attrib;
|
||||
@ -250,15 +63,15 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group,
|
||||
|
||||
guest_address = cpu_address;
|
||||
if (!props.is_tiled) {
|
||||
guest_size_bytes = pitch * size.height * 4;
|
||||
guest_size = pitch * size.height * 4;
|
||||
} else {
|
||||
if (Config::isNeoMode()) {
|
||||
guest_size_bytes = pitch * ((size.height + 127) & (~127)) * 4;
|
||||
if (Libraries::Kernel::sceKernelIsNeoMode()) {
|
||||
guest_size = pitch * ((size.height + 127) & (~127)) * 4;
|
||||
} else {
|
||||
guest_size_bytes = pitch * ((size.height + 63) & (~63)) * 4;
|
||||
guest_size = pitch * ((size.height + 63) & (~63)) * 4;
|
||||
}
|
||||
}
|
||||
mips_layout.emplace_back(guest_size_bytes, pitch, 0);
|
||||
mips_layout.emplace_back(guest_size, pitch, 0);
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
@ -279,9 +92,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
||||
|
||||
guest_address = buffer.Address();
|
||||
const auto color_slice_sz = buffer.GetColorSliceSize();
|
||||
guest_size_bytes = color_slice_sz * buffer.NumSlices();
|
||||
guest_size = color_slice_sz * buffer.NumSlices();
|
||||
mips_layout.emplace_back(color_slice_sz, pitch, 0);
|
||||
tiling_idx = static_cast<u32>(buffer.attrib.tile_mode_index.Value());
|
||||
alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && buffer.info.alt_tile_mode;
|
||||
}
|
||||
|
||||
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
|
||||
@ -303,7 +117,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
|
||||
|
||||
guest_address = buffer.Address();
|
||||
const auto depth_slice_sz = buffer.GetDepthSliceSize();
|
||||
guest_size_bytes = depth_slice_sz * num_slices;
|
||||
guest_size = depth_slice_sz * num_slices;
|
||||
mips_layout.emplace_back(depth_slice_sz, pitch, 0);
|
||||
}
|
||||
|
||||
@ -333,13 +147,14 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
|
||||
|
||||
mips_layout.reserve(resources.levels);
|
||||
tiling_idx = image.tiling_index;
|
||||
alt_tile = Libraries::Kernel::sceKernelIsNeoMode() && image.alt_tile_mode;
|
||||
UpdateSize();
|
||||
}
|
||||
|
||||
void ImageInfo::UpdateSize() {
|
||||
mips_layout.clear();
|
||||
MipInfo mip_info{};
|
||||
guest_size_bytes = 0;
|
||||
guest_size = 0;
|
||||
for (auto mip = 0u; mip < resources.levels; ++mip) {
|
||||
auto bpp = num_bits;
|
||||
auto mip_w = pitch >> mip;
|
||||
@ -384,7 +199,7 @@ void ImageInfo::UpdateSize() {
|
||||
case AmdGpu::TilingMode::Depth_MacroTiled: {
|
||||
ASSERT(!props.is_block);
|
||||
std::tie(mip_info.pitch, mip_info.size) =
|
||||
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx, mip);
|
||||
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx, mip, alt_tile);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@ -392,11 +207,11 @@ void ImageInfo::UpdateSize() {
|
||||
}
|
||||
}
|
||||
mip_info.size *= mip_d;
|
||||
mip_info.offset = guest_size_bytes;
|
||||
mip_info.offset = guest_size;
|
||||
mips_layout.emplace_back(mip_info);
|
||||
guest_size_bytes += mip_info.size;
|
||||
guest_size += mip_info.size;
|
||||
}
|
||||
guest_size_bytes *= resources.layers;
|
||||
guest_size *= resources.layers;
|
||||
}
|
||||
|
||||
int ImageInfo::IsMipOf(const ImageInfo& info) const {
|
||||
@ -468,18 +283,18 @@ int ImageInfo::IsSliceOf(const ImageInfo& info) const {
|
||||
}
|
||||
|
||||
// Check for size alignment.
|
||||
const bool slice_size = info.guest_size_bytes / info.resources.layers;
|
||||
if (guest_size_bytes % slice_size != 0) {
|
||||
const bool slice_size = info.guest_size / info.resources.layers;
|
||||
if (guest_size % slice_size != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Ensure that address is aligned too.
|
||||
const auto addr_diff = guest_address - info.guest_address;
|
||||
if ((addr_diff % guest_size_bytes) != 0) {
|
||||
if ((addr_diff % guest_size) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return addr_diff / guest_size_bytes;
|
||||
return addr_diff / guest_size;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -84,8 +84,9 @@ struct ImageInfo {
|
||||
};
|
||||
boost::container::small_vector<MipInfo, 14> mips_layout;
|
||||
VAddr guest_address{0};
|
||||
u32 guest_size_bytes{0};
|
||||
u32 guest_size{0};
|
||||
u32 tiling_idx{0}; // TODO: merge with existing!
|
||||
bool alt_tile{false};
|
||||
|
||||
VAddr stencil_addr{0};
|
||||
u32 stencil_size{0};
|
||||
|
@ -3,7 +3,9 @@
|
||||
|
||||
#include <optional>
|
||||
#include <xxhash.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/page_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -34,7 +36,7 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
||||
Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image");
|
||||
img.flags = ImageFlagBits::Empty;
|
||||
img.track_addr = img.info.guest_address;
|
||||
img.track_addr_end = img.info.guest_address + img.info.guest_size_bytes;
|
||||
img.track_addr_end = img.info.guest_address + img.info.guest_size;
|
||||
|
||||
ImageViewInfo view_info;
|
||||
const auto null_view_id =
|
||||
@ -50,7 +52,7 @@ void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) {
|
||||
if (image.hash == 0) {
|
||||
// Initialize hash
|
||||
const u8* addr = std::bit_cast<u8*>(image.info.guest_address);
|
||||
image.hash = XXH3_64bits(addr, image.info.guest_size_bytes);
|
||||
image.hash = XXH3_64bits(addr, image.info.guest_size);
|
||||
}
|
||||
image.flags |= ImageFlagBits::MaybeCpuDirty;
|
||||
UntrackImage(image_id);
|
||||
@ -63,7 +65,7 @@ void TextureCache::InvalidateMemory(VAddr addr, size_t size) {
|
||||
const auto pages_end = PageManager::GetNextPageAddr(addr + size - 1);
|
||||
ForEachImageInRegion(pages_start, pages_end - pages_start, [&](ImageId image_id, Image& image) {
|
||||
const auto image_begin = image.info.guest_address;
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size;
|
||||
if (image_begin < end && addr < image_end) {
|
||||
// Start or end of the modified region is in the image, or the image is entirely within
|
||||
// the modified region, so the image was definitely accessed by this page fault.
|
||||
@ -201,7 +203,7 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
}
|
||||
|
||||
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
|
||||
image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) {
|
||||
image_info.guest_size <= tex_cache_image.info.guest_size) {
|
||||
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
|
||||
const auto& result_image = slot_images[result_id];
|
||||
return {
|
||||
@ -302,7 +304,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
boost::container::small_vector<ImageId, 8> image_ids;
|
||||
ForEachImageInRegion(info.guest_address, info.guest_size_bytes,
|
||||
ForEachImageInRegion(info.guest_address, info.guest_size,
|
||||
[&](ImageId image_id, Image& image) { image_ids.push_back(image_id); });
|
||||
|
||||
ImageId image_id{};
|
||||
@ -313,8 +315,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
|
||||
if (cache_image.info.guest_address != info.guest_address) {
|
||||
continue;
|
||||
}
|
||||
if (False(flags & FindFlags::RelaxSize) &&
|
||||
cache_image.info.guest_size_bytes != info.guest_size_bytes) {
|
||||
if (False(flags & FindFlags::RelaxSize) && cache_image.info.guest_size != info.guest_size) {
|
||||
continue;
|
||||
}
|
||||
if (False(flags & FindFlags::RelaxDim) && cache_image.info.size != info.size) {
|
||||
@ -455,7 +456,7 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
if (!stencil_id) {
|
||||
ImageInfo info{};
|
||||
info.guest_address = desc.info.stencil_addr;
|
||||
info.guest_size_bytes = desc.info.stencil_size;
|
||||
info.guest_size = desc.info.stencil_size;
|
||||
info.size = desc.info.size;
|
||||
stencil_id = slot_images.insert(instance, scheduler, info);
|
||||
RegisterImage(stencil_id);
|
||||
@ -468,6 +469,9 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
|
||||
}
|
||||
|
||||
void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) {
|
||||
RENDERER_TRACE;
|
||||
TRACE_HINT(fmt::format("{:x}:{:x}", image.info.guest_address, image.info.guest_size));
|
||||
|
||||
if (False(image.flags & ImageFlagBits::Dirty)) {
|
||||
return;
|
||||
}
|
||||
@ -543,7 +547,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
|
||||
|
||||
const auto cmdbuf = sched_ptr->CommandBuffer();
|
||||
const VAddr image_addr = image.info.guest_address;
|
||||
const size_t image_size = image.info.guest_size_bytes;
|
||||
const size_t image_size = image.info.guest_size;
|
||||
const auto [vk_buffer, buf_offset] =
|
||||
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
|
||||
|
||||
@ -612,7 +616,7 @@ void TextureCache::RegisterImage(ImageId image_id) {
|
||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
||||
"Trying to register an already registered image");
|
||||
image.flags |= ImageFlagBits::Registered;
|
||||
ForEachPage(image.info.guest_address, image.info.guest_size_bytes,
|
||||
ForEachPage(image.info.guest_address, image.info.guest_size,
|
||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
||||
}
|
||||
|
||||
@ -621,7 +625,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
|
||||
ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
|
||||
"Trying to unregister an already unregistered image");
|
||||
image.flags &= ~ImageFlagBits::Registered;
|
||||
ForEachPage(image.info.guest_address, image.info.guest_size_bytes, [this, image_id](u64 page) {
|
||||
ForEachPage(image.info.guest_address, image.info.guest_size, [this, image_id](u64 page) {
|
||||
const auto page_it = page_table.find(page);
|
||||
if (page_it == nullptr) {
|
||||
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift);
|
||||
@ -640,7 +644,7 @@ void TextureCache::UnregisterImage(ImageId image_id) {
|
||||
void TextureCache::TrackImage(ImageId image_id) {
|
||||
auto& image = slot_images[image_id];
|
||||
const auto image_begin = image.info.guest_address;
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size;
|
||||
if (image_begin == image.track_addr && image_end == image.track_addr_end) {
|
||||
return;
|
||||
}
|
||||
@ -649,7 +653,7 @@ void TextureCache::TrackImage(ImageId image_id) {
|
||||
// Re-track the whole image
|
||||
image.track_addr = image_begin;
|
||||
image.track_addr_end = image_end;
|
||||
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size_bytes, 1);
|
||||
tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size, 1);
|
||||
} else {
|
||||
if (image_begin < image.track_addr) {
|
||||
TrackImageHead(image_id);
|
||||
@ -674,7 +678,7 @@ void TextureCache::TrackImageHead(ImageId image_id) {
|
||||
|
||||
void TextureCache::TrackImageTail(ImageId image_id) {
|
||||
auto& image = slot_images[image_id];
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size;
|
||||
if (image_end == image.track_addr_end) {
|
||||
return;
|
||||
}
|
||||
@ -719,7 +723,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) {
|
||||
|
||||
void TextureCache::UntrackImageTail(ImageId image_id) {
|
||||
auto& image = slot_images[image_id];
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size_bytes;
|
||||
const auto image_end = image.info.guest_address + image.info.guest_size;
|
||||
if (!image.IsTracked() || image.track_addr_end < image_end) {
|
||||
return;
|
||||
}
|
||||
|
347
src/video_core/texture_cache/tile.h
Normal file
347
src/video_core/texture_cache/tile.h
Normal file
@ -0,0 +1,347 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
// clang-format off
|
||||
// The table of macro tiles parameters for given tiling index (row) and bpp (column)
|
||||
/* Calculation:
|
||||
* - Inputs:
|
||||
* TileMode, BytesPerPixel, NumFragments
|
||||
* - Constants:
|
||||
* MicroTileWidth = 8, MicroTileHeight = 8,
|
||||
* Tile Mode LUTs: IsDepth(), IsPrt(), TileThickness(), TileSplit(), SampleSplit(), NumPipes()
|
||||
* Macro Tile Mode LUTs: BankWidth(), BankHeight(), NumBanks(), MacroTileAspect()
|
||||
* - Determine the macro tile mode:
|
||||
* TileBytes = MicroTileWidth * MicroTileHeight * TileThickness(TileMode) * BytesPerPixel
|
||||
* TileSplit = min(IsDepth(TileMode) ? TileSplit(TileMode) : max(TileBytes * SampleSplit(TileMode), 256), NumFragments * TileBytes, 1024)
|
||||
* MacroTileModeIndex = log2(TileSplit / 64)
|
||||
* MacroTileMode = IsPrt(TileMode) ? MacroTileModeIndex + 8 : MacroTileModeIndex
|
||||
* - Calculate macro tile width and height:
|
||||
* Width = NumPipes(TileMode) * BankWidth(MacroTileMode) * MicroTileWidth * MacroTileAspect(MacroTileMode, AltTileMode)
|
||||
* Height = NumBanks(MacroTileMode, AltTileMode) * BankHeight(MacroTileMode, AltTileMode) * MicroTileHeight / MacroTileAspect(MacroTileMode, AltTileMode)
|
||||
*/
|
||||
|
||||
constexpr std::array macro_tile_extents_x1{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_x2{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_x4{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_x8{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 01
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 02
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 03
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0A
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0B
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0E
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 0F
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 10
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 11
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 14
|
||||
std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 18
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 19
|
||||
std::pair{128u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, std::pair{64u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_alt_x1{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0A
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 0B
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0E
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, // 0F
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 10
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 11
|
||||
std::pair{256u, 256u}, std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_alt_x2{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0B
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 10
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 11
|
||||
std::pair{256u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_alt_x4{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0B
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 10
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 11
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_alt_x8{
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 00
|
||||
std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, std::pair{256u, 128u}, // 01
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 02
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 03
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 04
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 05
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, // 06
|
||||
std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 07
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 08
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 09
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0A
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0B
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 0C
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 0D
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0E
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 0F
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 10
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 11
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 12
|
||||
std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, std::pair{0u, 0u}, // 13
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 14
|
||||
std::pair{128u, 128u}, std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 15
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 16
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 17
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 32u}, std::pair{128u, 32u}, std::pair{128u, 32u}, // 18
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 19
|
||||
std::pair{128u, 128u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, std::pair{128u, 64u}, // 1A
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents{
|
||||
macro_tile_extents_x1,
|
||||
macro_tile_extents_x2,
|
||||
macro_tile_extents_x4,
|
||||
macro_tile_extents_x8,
|
||||
};
|
||||
|
||||
constexpr std::array macro_tile_extents_alt{
|
||||
macro_tile_extents_alt_x1,
|
||||
macro_tile_extents_alt_x2,
|
||||
macro_tile_extents_alt_x4,
|
||||
macro_tile_extents_alt_x8,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
constexpr std::pair micro_tile_extent{8u, 8u};
|
||||
constexpr auto hw_pipe_interleave = 256u;
|
||||
|
||||
constexpr std::pair<u32, u32> GetMacroTileExtents(u32 tiling_idx, u32 bpp, u32 num_samples,
|
||||
bool alt) {
|
||||
ASSERT(num_samples <= 8);
|
||||
const auto samples_log = static_cast<u32>(std::log2(num_samples));
|
||||
const auto row = tiling_idx * 5;
|
||||
const auto column = std::bit_width(bpp) - 4; // bpps are 8, 16, 32, 64, 128
|
||||
return (alt ? macro_tile_extents_alt : macro_tile_extents)[samples_log][row + column];
|
||||
}
|
||||
|
||||
constexpr std::pair<u32, size_t> ImageSizeLinearAligned(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples) {
|
||||
const auto pitch_align = std::max(8u, 64u / ((bpp + 7) / 8));
|
||||
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = height;
|
||||
size_t log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
const auto slice_align = std::max(64u, 256u / ((bpp + 7) / 8));
|
||||
while (log_sz % slice_align) {
|
||||
pitch_aligned += pitch_align;
|
||||
log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
}
|
||||
return {pitch_aligned, (log_sz * bpp + 7) / 8};
|
||||
}
|
||||
|
||||
constexpr std::pair<u32, size_t> ImageSizeMicroTiled(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples) {
|
||||
const auto& [pitch_align, height_align] = micro_tile_extent;
|
||||
auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
|
||||
size_t log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
while (log_sz % 256) {
|
||||
pitch_aligned += 8;
|
||||
log_sz = (pitch_aligned * height_aligned * bpp * num_samples + 7) / 8;
|
||||
}
|
||||
return {pitch_aligned, log_sz};
|
||||
}
|
||||
|
||||
constexpr std::pair<u32, size_t> ImageSizeMacroTiled(u32 pitch, u32 height, u32 bpp,
|
||||
u32 num_samples, u32 tiling_idx, u32 mip_n,
|
||||
bool alt) {
|
||||
const auto& [pitch_align, height_align] =
|
||||
GetMacroTileExtents(tiling_idx, bpp, num_samples, alt);
|
||||
ASSERT(pitch_align != 0 && height_align != 0);
|
||||
bool downgrade_to_micro = false;
|
||||
if (mip_n > 0) {
|
||||
const bool is_less_than_tile = pitch < pitch_align || height < height_align;
|
||||
// TODO: threshold check
|
||||
downgrade_to_micro = is_less_than_tile;
|
||||
}
|
||||
|
||||
if (downgrade_to_micro) {
|
||||
return ImageSizeMicroTiled(pitch, height, bpp, num_samples);
|
||||
}
|
||||
|
||||
const auto pitch_aligned = (pitch + pitch_align - 1) & ~(pitch_align - 1);
|
||||
const auto height_aligned = (height + height_align - 1) & ~(height_align - 1);
|
||||
const auto log_sz = pitch_aligned * height_aligned * num_samples;
|
||||
return {pitch_aligned, (log_sz * bpp + 7) / 8};
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
@ -8,110 +8,47 @@
|
||||
#include "video_core/texture_cache/image_view.h"
|
||||
#include "video_core/texture_cache/tile_manager.h"
|
||||
|
||||
#include "video_core/host_shaders/detile_m32x1_comp.h"
|
||||
#include "video_core/host_shaders/detile_m32x2_comp.h"
|
||||
#include "video_core/host_shaders/detile_m32x4_comp.h"
|
||||
#include "video_core/host_shaders/detile_m8x1_comp.h"
|
||||
#include "video_core/host_shaders/detile_m8x2_comp.h"
|
||||
#include "video_core/host_shaders/detile_macro32x1_comp.h"
|
||||
#include "video_core/host_shaders/detile_macro32x2_comp.h"
|
||||
#include "video_core/host_shaders/detilers/macro_32bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/macro_64bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/macro_8bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/micro_128bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/micro_16bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/micro_32bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/micro_64bpp_comp.h"
|
||||
#include "video_core/host_shaders/detilers/micro_8bpp_comp.h"
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
// #include <boost/container/static_vector.hpp>
|
||||
#include <magic_enum/magic_enum.hpp>
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
static vk::Format DemoteImageFormatForDetiling(vk::Format format) {
|
||||
switch (format) {
|
||||
case vk::Format::eR8Uint:
|
||||
case vk::Format::eR8Unorm:
|
||||
case vk::Format::eR8Snorm:
|
||||
return vk::Format::eR8Uint;
|
||||
case vk::Format::eR4G4B4A4UnormPack16:
|
||||
case vk::Format::eB5G6R5UnormPack16:
|
||||
case vk::Format::eR5G5B5A1UnormPack16:
|
||||
case vk::Format::eR8G8Unorm:
|
||||
case vk::Format::eR16Sfloat:
|
||||
case vk::Format::eR16Uint:
|
||||
case vk::Format::eR16Unorm:
|
||||
case vk::Format::eD16Unorm:
|
||||
return vk::Format::eR8G8Uint;
|
||||
case vk::Format::eR8G8B8A8Srgb:
|
||||
case vk::Format::eB8G8R8A8Srgb:
|
||||
case vk::Format::eB8G8R8A8Unorm:
|
||||
case vk::Format::eR8G8B8A8Unorm:
|
||||
case vk::Format::eR8G8B8A8Snorm:
|
||||
case vk::Format::eR8G8B8A8Uint:
|
||||
case vk::Format::eR32Sfloat:
|
||||
case vk::Format::eD32Sfloat:
|
||||
case vk::Format::eR32Uint:
|
||||
case vk::Format::eR16G16Sfloat:
|
||||
case vk::Format::eR16G16Unorm:
|
||||
case vk::Format::eR16G16Snorm:
|
||||
case vk::Format::eB10G11R11UfloatPack32:
|
||||
case vk::Format::eA2B10G10R10UnormPack32:
|
||||
return vk::Format::eR32Uint;
|
||||
case vk::Format::eBc1RgbaSrgbBlock:
|
||||
case vk::Format::eBc1RgbaUnormBlock:
|
||||
case vk::Format::eBc4UnormBlock:
|
||||
case vk::Format::eR32G32Sfloat:
|
||||
case vk::Format::eR32G32Uint:
|
||||
case vk::Format::eR16G16B16A16Unorm:
|
||||
case vk::Format::eR16G16B16A16Uint:
|
||||
case vk::Format::eR16G16B16A16Sfloat:
|
||||
return vk::Format::eR32G32Uint;
|
||||
case vk::Format::eBc2SrgbBlock:
|
||||
case vk::Format::eBc2UnormBlock:
|
||||
case vk::Format::eBc3SrgbBlock:
|
||||
case vk::Format::eBc3UnormBlock:
|
||||
case vk::Format::eBc5UnormBlock:
|
||||
case vk::Format::eBc5SnormBlock:
|
||||
case vk::Format::eBc7SrgbBlock:
|
||||
case vk::Format::eBc7UnormBlock:
|
||||
case vk::Format::eBc6HUfloatBlock:
|
||||
case vk::Format::eR32G32B32A32Uint:
|
||||
case vk::Format::eR32G32B32A32Sfloat:
|
||||
return vk::Format::eR32G32B32A32Uint;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Log missing formats only once to avoid spamming the log.
|
||||
static constexpr size_t MaxFormatIndex = 256;
|
||||
static std::array<bool, MaxFormatIndex> logged_formats{};
|
||||
if (const u32 index = u32(format); !logged_formats[index]) {
|
||||
LOG_ERROR(Render_Vulkan, "Unexpected format for demotion {}", vk::to_string(format));
|
||||
logged_formats[index] = true;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
|
||||
const auto format = DemoteImageFormatForDetiling(info.pixel_format);
|
||||
|
||||
const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
|
||||
switch (info.tiling_mode) {
|
||||
case AmdGpu::TilingMode::Texture_MicroTiled:
|
||||
switch (format) {
|
||||
case vk::Format::eR8Uint:
|
||||
return &detilers[DetilerType::Micro8x1];
|
||||
case vk::Format::eR8G8Uint:
|
||||
return &detilers[DetilerType::Micro8x2];
|
||||
case vk::Format::eR32Uint:
|
||||
return &detilers[DetilerType::Micro32x1];
|
||||
case vk::Format::eR32G32Uint:
|
||||
return &detilers[DetilerType::Micro32x2];
|
||||
case vk::Format::eR32G32B32A32Uint:
|
||||
return &detilers[DetilerType::Micro32x4];
|
||||
switch (bpp) {
|
||||
case 8:
|
||||
return &detilers[DetilerType::Micro8];
|
||||
case 16:
|
||||
return &detilers[DetilerType::Micro16];
|
||||
case 32:
|
||||
return &detilers[DetilerType::Micro32];
|
||||
case 64:
|
||||
return &detilers[DetilerType::Micro64];
|
||||
case 128:
|
||||
return &detilers[DetilerType::Micro128];
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
case AmdGpu::TilingMode::Texture_Volume:
|
||||
switch (format) {
|
||||
case vk::Format::eR32Uint:
|
||||
return &detilers[DetilerType::Macro32x1];
|
||||
case vk::Format::eR32G32Uint:
|
||||
return &detilers[DetilerType::Macro32x2];
|
||||
switch (bpp) {
|
||||
case 8:
|
||||
return &detilers[DetilerType::Macro8];
|
||||
case 32:
|
||||
return &detilers[DetilerType::Macro32];
|
||||
case 64:
|
||||
return &detilers[DetilerType::Macro64];
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@ -131,10 +68,10 @@ struct DetilerParams {
|
||||
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
|
||||
: instance{instance}, scheduler{scheduler} {
|
||||
static const std::array detiler_shaders{
|
||||
HostShaders::DETILE_M8X1_COMP, HostShaders::DETILE_M8X2_COMP,
|
||||
HostShaders::DETILE_M32X1_COMP, HostShaders::DETILE_M32X2_COMP,
|
||||
HostShaders::DETILE_M32X4_COMP, HostShaders::DETILE_MACRO32X1_COMP,
|
||||
HostShaders::DETILE_MACRO32X2_COMP,
|
||||
HostShaders::MICRO_8BPP_COMP, HostShaders::MICRO_16BPP_COMP,
|
||||
HostShaders::MICRO_32BPP_COMP, HostShaders::MICRO_64BPP_COMP,
|
||||
HostShaders::MICRO_128BPP_COMP, HostShaders::MACRO_8BPP_COMP,
|
||||
HostShaders::MACRO_32BPP_COMP, HostShaders::MACRO_64BPP_COMP,
|
||||
};
|
||||
|
||||
boost::container::static_vector<vk::DescriptorSetLayoutBinding, 2> bindings{
|
||||
@ -275,7 +212,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
||||
return {in_buffer, in_offset};
|
||||
}
|
||||
|
||||
const u32 image_size = info.guest_size_bytes;
|
||||
const u32 image_size = info.guest_size;
|
||||
|
||||
// Prepare output buffer
|
||||
auto out_buffer = AllocBuffer(image_size, true);
|
||||
@ -323,7 +260,6 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
|
||||
params.height = info.size.height;
|
||||
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) {
|
||||
ASSERT(info.resources.levels == 1);
|
||||
ASSERT(info.num_bits >= 32);
|
||||
const auto tiles_per_row = info.pitch / 8u;
|
||||
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
|
||||
params.sizes[0] = tiles_per_row;
|
||||
|
@ -12,14 +12,15 @@ class TextureCache;
|
||||
struct ImageInfo;
|
||||
|
||||
enum DetilerType : u32 {
|
||||
Micro8x1,
|
||||
Micro8x2,
|
||||
Micro32x1,
|
||||
Micro32x2,
|
||||
Micro32x4,
|
||||
Micro8,
|
||||
Micro16,
|
||||
Micro32,
|
||||
Micro64,
|
||||
Micro128,
|
||||
|
||||
Macro32x1,
|
||||
Macro32x2,
|
||||
Macro8,
|
||||
Macro32,
|
||||
Macro64,
|
||||
|
||||
Max
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user