Merge remote-tracking branch 'upstream/main'

This commit is contained in:
rainmakerv2 2024-12-05 13:05:25 +08:00
commit 28ada0425e
39 changed files with 615 additions and 339 deletions

View File

@ -69,7 +69,7 @@ if (NOT TARGET ZLIB::ZLIB)
FetchContent_MakeAvailable(ZLIB) FetchContent_MakeAvailable(ZLIB)
add_library(ZLIB::ZLIB ALIAS zlib) add_library(ZLIB::ZLIB ALIAS zlib)
# libpng expects this variable to exist after its find_package(ZLIB) # libpng expects this variable to exist after its find_package(ZLIB)
get_target_property(ZLIB_INCLUDE_DIRS zlib INTERFACE_INCLUDE_DIRECTORIES) set(ZLIB_INCLUDE_DIRS "${FETCHCONTENT_BASE_DIR}/zlib-build")
endif() endif()
# SDL3 # SDL3

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <cinttypes>
#include <imgui.h> #include <imgui.h>
#include <magic_enum.hpp> #include <magic_enum.hpp>
@ -23,7 +24,7 @@ bool MemoryMapViewer::Iterator::DrawLine() {
return DrawLine(); return DrawLine();
} }
TableNextColumn(); TableNextColumn();
Text("%zX", m.base); Text("%" PRIXPTR, m.base);
TableNextColumn(); TableNextColumn();
Text("%zX", m.size); Text("%zX", m.size);
TableNextColumn(); TableNextColumn();
@ -48,9 +49,9 @@ bool MemoryMapViewer::Iterator::DrawLine() {
return DrawLine(); return DrawLine();
} }
TableNextColumn(); TableNextColumn();
Text("%llX", m.base); Text("%" PRIXPTR, m.base);
TableNextColumn(); TableNextColumn();
Text("%llX", m.size); Text("%zX", m.size);
TableNextColumn(); TableNextColumn();
auto type = static_cast<::Libraries::Kernel::MemoryTypes>(m.memory_type); auto type = static_cast<::Libraries::Kernel::MemoryTypes>(m.memory_type);
Text("%s", magic_enum::enum_name(type).data()); Text("%s", magic_enum::enum_name(type).data());

View File

@ -44,10 +44,14 @@ public:
openEvent.param.rect.y = m_param.ime.posy; openEvent.param.rect.y = m_param.ime.posy;
} else { } else {
openEvent.param.resource_id_array.userId = 1; openEvent.param.resource_id_array.userId = 1;
openEvent.param.resource_id_array.resource_id[0] = 1; openEvent.param.resource_id_array.resourceId[0] = 1;
} }
Execute(nullptr, &openEvent, true); // Are we supposed to call the event handler on init with
// ADD_OSK?
if (!ime_mode && False(m_param.key.option & OrbisImeKeyboardOption::AddOsk)) {
Execute(nullptr, &openEvent, true);
}
if (ime_mode) { if (ime_mode) {
g_ime_state = ImeState(&m_param.ime); g_ime_state = ImeState(&m_param.ime);
@ -56,6 +60,11 @@ public:
} }
s32 Update(OrbisImeEventHandler handler) { s32 Update(OrbisImeEventHandler handler) {
if (!m_ime_mode) {
/* We don't handle any events for ImeKeyboard */
return ORBIS_OK;
}
std::unique_lock lock{g_ime_state.queue_mutex}; std::unique_lock lock{g_ime_state.queue_mutex};
while (!g_ime_state.event_queue.empty()) { while (!g_ime_state.event_queue.empty()) {
@ -85,6 +94,16 @@ public:
} }
} }
s32 SetText(const char16_t* text, u32 length) {
g_ime_state.SetText(text, length);
return ORBIS_OK;
}
s32 SetCaret(const OrbisImeCaret* caret) {
g_ime_state.SetCaret(caret->index);
return ORBIS_OK;
}
bool IsIme() { bool IsIme() {
return m_ime_mode; return m_ime_mode;
} }
@ -98,6 +117,7 @@ private:
}; };
static std::unique_ptr<ImeHandler> g_ime_handler; static std::unique_ptr<ImeHandler> g_ime_handler;
static std::unique_ptr<ImeHandler> g_keyboard_handler;
int PS4_SYSV_ABI FinalizeImeModule() { int PS4_SYSV_ABI FinalizeImeModule() {
LOG_ERROR(Lib_Ime, "(STUBBED) called"); LOG_ERROR(Lib_Ime, "(STUBBED) called");
@ -130,9 +150,6 @@ s32 PS4_SYSV_ABI sceImeClose() {
if (!g_ime_handler) { if (!g_ime_handler) {
return ORBIS_IME_ERROR_NOT_OPENED; return ORBIS_IME_ERROR_NOT_OPENED;
} }
if (!g_ime_handler->IsIme()) {
return ORBIS_IME_ERROR_NOT_OPENED;
}
g_ime_handler.release(); g_ime_handler.release();
g_ime_ui = ImeUi(); g_ime_ui = ImeUi();
@ -233,14 +250,11 @@ s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32*
s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) { s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) {
LOG_INFO(Lib_Ime, "(STUBBED) called"); LOG_INFO(Lib_Ime, "(STUBBED) called");
if (!g_ime_handler) { if (!g_keyboard_handler) {
return ORBIS_IME_ERROR_NOT_OPENED;
}
if (g_ime_handler->IsIme()) {
return ORBIS_IME_ERROR_NOT_OPENED; return ORBIS_IME_ERROR_NOT_OPENED;
} }
g_ime_handler.release(); g_keyboard_handler.release();
return ORBIS_OK; return ORBIS_OK;
} }
@ -255,18 +269,17 @@ int PS4_SYSV_ABI sceImeKeyboardGetResourceId() {
} }
s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) { s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) {
LOG_ERROR(Lib_Ime, "(STUBBED) called"); LOG_INFO(Lib_Ime, "called");
if (!param) { if (!param) {
return ORBIS_IME_ERROR_INVALID_ADDRESS; return ORBIS_IME_ERROR_INVALID_ADDRESS;
} }
if (g_ime_handler) { if (g_keyboard_handler) {
return ORBIS_IME_ERROR_BUSY; return ORBIS_IME_ERROR_BUSY;
} }
// g_ime_handler = std::make_unique<ImeHandler>(param); g_keyboard_handler = std::make_unique<ImeHandler>(param);
// return ORBIS_OK; return ORBIS_OK;
return ORBIS_IME_ERROR_CONNECTION_FAILED; // Fixup
} }
int PS4_SYSV_ABI sceImeKeyboardOpenInternal() { int PS4_SYSV_ABI sceImeKeyboardOpenInternal() {
@ -287,16 +300,14 @@ int PS4_SYSV_ABI sceImeKeyboardUpdate() {
s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended) { s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended) {
LOG_INFO(Lib_Ime, "called"); LOG_INFO(Lib_Ime, "called");
if (!g_ime_handler) { if (!param) {
g_ime_handler = std::make_unique<ImeHandler>(param); return ORBIS_IME_ERROR_INVALID_ADDRESS;
} else { }
if (g_ime_handler->IsIme()) { if (g_ime_handler) {
return ORBIS_IME_ERROR_BUSY; return ORBIS_IME_ERROR_BUSY;
}
g_ime_handler->Init((void*)param, true);
} }
g_ime_handler = std::make_unique<ImeHandler>(param);
return ORBIS_OK; return ORBIS_OK;
} }
@ -322,13 +333,29 @@ int PS4_SYSV_ABI sceImeSetCandidateIndex() {
} }
int PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) { int PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) {
LOG_ERROR(Lib_Ime, "(STUBBED) called"); LOG_TRACE(Lib_Ime, "called");
return ORBIS_OK;
if (!g_ime_handler) {
return ORBIS_IME_ERROR_NOT_OPENED;
}
if (!caret) {
return ORBIS_IME_ERROR_INVALID_ADDRESS;
}
return g_ime_handler->SetCaret(caret);
} }
int PS4_SYSV_ABI sceImeSetText() { s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) {
LOG_ERROR(Lib_Ime, "(STUBBED) called"); LOG_TRACE(Lib_Ime, "called");
return ORBIS_OK;
if (!g_ime_handler) {
return ORBIS_IME_ERROR_NOT_OPENED;
}
if (!text) {
return ORBIS_IME_ERROR_INVALID_ADDRESS;
}
return g_ime_handler->SetText(text, length);
} }
int PS4_SYSV_ABI sceImeSetTextGeometry() { int PS4_SYSV_ABI sceImeSetTextGeometry() {
@ -337,13 +364,19 @@ int PS4_SYSV_ABI sceImeSetTextGeometry() {
} }
s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) { s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) {
LOG_TRACE(Lib_Ime, "called"); if (g_ime_handler) {
g_ime_handler->Update(handler);
}
if (!g_ime_handler) { if (g_keyboard_handler) {
g_keyboard_handler->Update(handler);
}
if (!g_ime_handler || !g_keyboard_handler) {
return ORBIS_IME_ERROR_NOT_OPENED; return ORBIS_IME_ERROR_NOT_OPENED;
} }
return g_ime_handler->Update(handler); return ORBIS_OK;
} }
int PS4_SYSV_ABI sceImeVshClearPreedit() { int PS4_SYSV_ABI sceImeVshClearPreedit() {

View File

@ -26,6 +26,24 @@ enum class OrbisImeKeyboardOption : u32 {
}; };
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption) DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption)
enum class OrbisImeOption : u32 {
DEFAULT = 0,
MULTILINE = 1,
NO_AUTO_CAPITALIZATION = 2,
PASSWORD = 4,
LANGUAGES_FORCED = 8,
EXT_KEYBOARD = 16,
NO_LEARNING = 32,
FIXED_POSITION = 64,
DISABLE_RESUME = 256,
DISABLE_AUTO_SPACE = 512,
DISABLE_POSITION_ADJUSTMENT = 2048,
EXPANDED_PREEDIT_BUFFER = 4096,
USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192,
USE_2K_COORDINATES = 16384,
};
DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption)
struct OrbisImeKeyboardParam { struct OrbisImeKeyboardParam {
OrbisImeKeyboardOption option; OrbisImeKeyboardOption option;
s8 reserved1[4]; s8 reserved1[4];
@ -41,9 +59,9 @@ struct OrbisImeParam {
OrbisImeEnterLabel enter_label; OrbisImeEnterLabel enter_label;
OrbisImeInputMethod input_method; OrbisImeInputMethod input_method;
OrbisImeTextFilter filter; OrbisImeTextFilter filter;
u32 option; OrbisImeOption option;
u32 max_text_length; u32 maxTextLength;
char16_t* input_text_buffer; char16_t* inputTextBuffer;
float posx; float posx;
float posy; float posy;
OrbisImeHorizontalAlignment horizontal_alignment; OrbisImeHorizontalAlignment horizontal_alignment;
@ -93,7 +111,7 @@ int PS4_SYSV_ABI sceImeOpenInternal();
void PS4_SYSV_ABI sceImeParamInit(OrbisImeParam* param); void PS4_SYSV_ABI sceImeParamInit(OrbisImeParam* param);
int PS4_SYSV_ABI sceImeSetCandidateIndex(); int PS4_SYSV_ABI sceImeSetCandidateIndex();
s32 PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret); s32 PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret);
int PS4_SYSV_ABI sceImeSetText(); s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length);
int PS4_SYSV_ABI sceImeSetTextGeometry(); int PS4_SYSV_ABI sceImeSetTextGeometry();
s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler); s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler);
int PS4_SYSV_ABI sceImeVshClearPreedit(); int PS4_SYSV_ABI sceImeVshClearPreedit();

View File

@ -142,7 +142,7 @@ struct OrbisImeKeycode {
struct OrbisImeKeyboardResourceIdArray { struct OrbisImeKeyboardResourceIdArray {
s32 userId; s32 userId;
u32 resource_id[6]; u32 resourceId[5];
}; };
enum class OrbisImeCaretMovementDirection : u32 { enum class OrbisImeCaretMovementDirection : u32 {

View File

@ -16,7 +16,7 @@ ImeState::ImeState(const OrbisImeParam* param) {
} }
work_buffer = param->work; work_buffer = param->work;
text_buffer = param->input_text_buffer; text_buffer = param->inputTextBuffer;
std::size_t text_len = std::char_traits<char16_t>::length(text_buffer); std::size_t text_len = std::char_traits<char16_t>::length(text_buffer);
if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(), if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(),
@ -26,15 +26,13 @@ ImeState::ImeState(const OrbisImeParam* param) {
} }
ImeState::ImeState(ImeState&& other) noexcept ImeState::ImeState(ImeState&& other) noexcept
: input_changed(other.input_changed), work_buffer(other.work_buffer), : work_buffer(other.work_buffer), text_buffer(other.text_buffer),
text_buffer(other.text_buffer), current_text(std::move(other.current_text)), current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) {
event_queue(std::move(other.event_queue)) {
other.text_buffer = nullptr; other.text_buffer = nullptr;
} }
ImeState& ImeState::operator=(ImeState&& other) noexcept { ImeState& ImeState::operator=(ImeState&& other) noexcept {
if (this != &other) { if (this != &other) {
input_changed = other.input_changed;
work_buffer = other.work_buffer; work_buffer = other.work_buffer;
text_buffer = other.text_buffer; text_buffer = other.text_buffer;
current_text = std::move(other.current_text); current_text = std::move(other.current_text);
@ -63,6 +61,10 @@ void ImeState::SendCloseEvent() {
SendEvent(&closeEvent); SendEvent(&closeEvent);
} }
void ImeState::SetText(const char16_t* text, u32 length) {}
void ImeState::SetCaret(u32 position) {}
bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len,
char* utf8_text, std::size_t utf8_text_len) { char* utf8_text, std::size_t utf8_text_len) {
std::fill(utf8_text, utf8_text + utf8_text_len, '\0'); std::fill(utf8_text, utf8_text + utf8_text_len, '\0');
@ -180,9 +182,8 @@ void ImeUi::DrawInputText() {
if (first_render) { if (first_render) {
SetKeyboardFocusHere(); SetKeyboardFocusHere();
} }
if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->max_text_length, if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength,
input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) { input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) {
state->input_changed = true;
} }
} }
@ -190,6 +191,39 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
ImeUi* ui = static_cast<ImeUi*>(data->UserData); ImeUi* ui = static_cast<ImeUi*>(data->UserData);
ASSERT(ui); ASSERT(ui);
static std::string lastText;
std::string currentText(data->Buf, data->BufTextLen);
if (currentText != lastText) {
OrbisImeEditText eventParam{};
eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
eventParam.caret_index = data->CursorPos;
eventParam.area_num = 1;
eventParam.text_area[0].mode = 1; // Edit mode
eventParam.text_area[0].index = data->CursorPos;
eventParam.text_area[0].length = data->BufTextLen;
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
ui->ime_param->inputTextBuffer,
ui->ime_param->maxTextLength)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateText;
event.param.text = eventParam;
lastText = currentText;
ui->state->SendEvent(&event);
}
static int lastCaretPos = -1; static int lastCaretPos = -1;
if (lastCaretPos == -1) { if (lastCaretPos == -1) {
lastCaretPos = data->CursorPos; lastCaretPos = data->CursorPos;
@ -209,39 +243,6 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
ui->state->SendEvent(&event); ui->state->SendEvent(&event);
} }
static std::string lastText;
std::string currentText(data->Buf, data->BufTextLen);
if (currentText != lastText) {
OrbisImeEditText eventParam{};
eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
eventParam.caret_index = data->CursorPos;
eventParam.area_num = 1;
eventParam.text_area[0].mode = 1; // Edit mode
eventParam.text_area[0].index = data->CursorPos;
eventParam.text_area[0].length = data->BufTextLen;
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
ui->ime_param->max_text_length)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
ui->ime_param->input_text_buffer,
ui->ime_param->max_text_length)) {
LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
return 0;
}
OrbisImeEvent event{};
event.id = OrbisImeEventId::UpdateText;
event.param.text = eventParam;
lastText = currentText;
ui->state->SendEvent(&event);
}
return 0; return 0;
} }

View File

@ -22,10 +22,7 @@ class ImeState {
friend class ImeHandler; friend class ImeHandler;
friend class ImeUi; friend class ImeUi;
bool input_changed = false;
void* work_buffer{}; void* work_buffer{};
char16_t* text_buffer{}; char16_t* text_buffer{};
// A character can hold up to 4 bytes in UTF-8 // A character can hold up to 4 bytes in UTF-8
@ -43,6 +40,9 @@ public:
void SendEnterEvent(); void SendEnterEvent();
void SendCloseEvent(); void SendCloseEvent();
void SetText(const char16_t* text, u32 length);
void SetCaret(u32 position);
private: private:
bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text, bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text,
std::size_t native_text_len); std::size_t native_text_len);

View File

@ -203,7 +203,7 @@ int PS4_SYSV_ABI _sigprocmask() {
} }
int PS4_SYSV_ABI posix_getpagesize() { int PS4_SYSV_ABI posix_getpagesize() {
return 4096; return 16_KB;
} }
void RegisterKernel(Core::Loader::SymbolsResolver* sym) { void RegisterKernel(Core::Loader::SymbolsResolver* sym) {

View File

@ -281,7 +281,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt
/* Create thread */ /* Create thread */
new_thread->native_thr = Core::Thread(); new_thread->native_thr = Core::Thread();
int ret = new_thread->native_thr.Create(RunThread, new_thread); int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr);
ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret);
if (ret) { if (ret) {
*thread = nullptr; *thread = nullptr;

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include "libraries/kernel/threads/pthread.h"
#include "thread.h" #include "thread.h"
#ifdef _WIN64 #ifdef _WIN64
@ -15,7 +16,7 @@ Thread::Thread() : native_handle{0} {}
Thread::~Thread() {} Thread::~Thread() {}
int Thread::Create(ThreadFunc func, void* arg) { int Thread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) {
#ifdef _WIN64 #ifdef _WIN64
native_handle = CreateThread(nullptr, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, nullptr); native_handle = CreateThread(nullptr, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, nullptr);
return native_handle ? 0 : -1; return native_handle ? 0 : -1;
@ -23,6 +24,7 @@ int Thread::Create(ThreadFunc func, void* arg) {
pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle); pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle);
pthread_attr_t pattr; pthread_attr_t pattr;
pthread_attr_init(&pattr); pthread_attr_init(&pattr);
pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr);
return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); return pthread_create(pthr, &pattr, (PthreadFunc)func, arg);
#endif #endif
} }

View File

@ -5,6 +5,10 @@
#include "common/types.h" #include "common/types.h"
namespace Libraries::Kernel {
struct PthreadAttr;
} // namespace Libraries::Kernel
namespace Core { namespace Core {
class Thread { class Thread {
@ -15,7 +19,7 @@ public:
Thread(); Thread();
~Thread(); ~Thread();
int Create(ThreadFunc func, void* arg); int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr);
void Exit(); void Exit();
uintptr_t GetHandle() { uintptr_t GetHandle() {

View File

@ -206,7 +206,7 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
return main; return main;
} }
void SetupCapabilities(const Info& info, EmitContext& ctx) { void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Image1D);
ctx.AddCapability(spv::Capability::Sampled1D); ctx.AddCapability(spv::Capability::Sampled1D);
ctx.AddCapability(spv::Capability::ImageQuery); ctx.AddCapability(spv::Capability::ImageQuery);
@ -251,6 +251,10 @@ void SetupCapabilities(const Info& info, EmitContext& ctx) {
if (info.stage == Stage::Geometry) { if (info.stage == Stage::Geometry) {
ctx.AddCapability(spv::Capability::Geometry); ctx.AddCapability(spv::Capability::Geometry);
} }
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
}
} }
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
@ -342,7 +346,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
EmitContext ctx{profile, runtime_info, program.info, binding}; EmitContext ctx{profile, runtime_info, program.info, binding};
const Id main{DefineMain(ctx, program)}; const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main); DefineEntryPoint(program, ctx, main);
SetupCapabilities(program.info, ctx); SetupCapabilities(program.info, profile, ctx);
SetupFloatMode(ctx, profile, runtime_info, main); SetupFloatMode(ctx, profile, runtime_info, main);
PatchPhiNodes(program, ctx); PatchPhiNodes(program, ctx);
binding.user_data += program.info.ud_mask.NumRegs(); binding.user_data += program.info.ud_mask.NumRegs();

View File

@ -171,54 +171,38 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
} }
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
if (ctx.info.stage == Stage::Geometry) { if (ctx.info.stage == Stage::Geometry) {
if (IR::IsPosition(attr)) { return EmitGetAttributeForGeometry(ctx, attr, comp, index);
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index),
ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
} }
if (IR::IsParam(attr)) { if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)}; const auto& param{ctx.input_params.at(index)};
if (param.buffer_handle < 0) { if (param.buffer_handle >= 0) {
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
Id result;
if (param.is_default) {
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
} else if (param.num_components > 1) {
const Id pointer{
ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
result = ctx.OpLoad(param.component_type, pointer);
} else {
result = ctx.OpLoad(param.component_type, param.id);
}
if (param.is_integer) {
result = ctx.OpBitcast(ctx.F32[1], result);
}
return result;
} else {
const auto step_rate = EmitReadStepRate(ctx, param.id.value); const auto step_rate = EmitReadStepRate(ctx, param.id.value);
const auto offset = ctx.OpIAdd( const auto offset = ctx.OpIAdd(
ctx.U32[1], ctx.U32[1],
@ -229,7 +213,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
ctx.ConstU32(comp)); ctx.ConstU32(comp));
return EmitReadConstBuffer(ctx, param.buffer_handle, offset); return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
} }
Id result;
if (param.is_loaded) {
// Attribute is either default or manually interpolated. The id points to an already
// loaded vector.
result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
} else if (param.num_components > 1) {
// Attribute is a vector and we need to access a specific component.
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
result = ctx.OpLoad(param.component_type, pointer);
} else {
// Attribute is a single float or interger, simply load it.
result = ctx.OpLoad(param.component_type, param.id);
}
if (param.is_integer) {
result = ctx.OpBitcast(ctx.F32[1], result);
}
return result;
} }
switch (attr) { switch (attr) {
case IR::Attribute::FragCoord: { case IR::Attribute::FragCoord: {
const Id coord = ctx.OpLoad( const Id coord = ctx.OpLoad(

View File

@ -187,7 +187,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) { Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) {
const auto& texture = ctx.images[handle & 0xFFFF]; const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id image = ctx.OpLoad(texture.image_type, texture.id);
const auto type = ctx.info.images[handle & 0xFFFF].type; const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
const auto type = sharp.GetBoundType();
const Id zero = ctx.u32_zero_value; const Id zero = ctx.u32_zero_value;
const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }}; const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage}; const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};

View File

@ -8,6 +8,9 @@
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
void EmitPrologue(EmitContext& ctx) { void EmitPrologue(EmitContext& ctx) {
if (ctx.stage == Stage::Fragment) {
ctx.DefineInterpolatedAttribs();
}
ctx.DefineBufferOffsets(); ctx.DefineBufferOffsets();
} }

View File

@ -4,6 +4,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/passes/srt.h"
#include "video_core/amdgpu/types.h" #include "video_core/amdgpu/types.h"
@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
} }
const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) { switch (GetNumberClass(fmt)) {
case AmdGpu::NumberFormat::Float: case AmdGpu::NumberClass::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Srgb:
return ctx.F32; return ctx.F32;
case AmdGpu::NumberFormat::Sint: case AmdGpu::NumberClass::Sint:
return ctx.S32; return ctx.S32;
case AmdGpu::NumberFormat::Uint: case AmdGpu::NumberClass::Uint:
return ctx.U32; return ctx.U32;
default: default:
break; break;
@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
u32 num_components, bool output) { u32 num_components, bool output) {
switch (fmt) { switch (GetNumberClass(fmt)) {
case AmdGpu::NumberFormat::Float: case AmdGpu::NumberClass::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
case AmdGpu::NumberFormat::Sscaled:
case AmdGpu::NumberFormat::Uscaled:
case AmdGpu::NumberFormat::Srgb:
return {id, output ? output_f32 : input_f32, F32[1], num_components, false}; return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
case AmdGpu::NumberFormat::Uint: case AmdGpu::NumberClass::Uint:
return {id, output ? output_u32 : input_u32, U32[1], num_components, true}; return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
case AmdGpu::NumberFormat::Sint: case AmdGpu::NumberClass::Sint:
return {id, output ? output_s32 : input_s32, S32[1], num_components, true}; return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
default: default:
break; break;
@ -222,6 +211,36 @@ void EmitContext::DefineBufferOffsets() {
} }
} }
void EmitContext::DefineInterpolatedAttribs() {
if (!profile.needs_manual_interpolation) {
return;
}
// Iterate all input attributes, load them and manually interpolate with barycentric
// coordinates.
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i];
const u32 semantic = input.param_index;
auto& params = input_params[semantic];
if (input.is_flat || params.is_loaded) {
continue;
}
const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)};
const Id p0{OpCompositeExtract(F32[4], p_array, 0U)};
const Id p1{OpCompositeExtract(F32[4], p_array, 1U)};
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
const Id p10{OpFSub(F32[4], p1, p0)};
const Id p20{OpFSub(F32[4], p2, p0)};
const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)};
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)};
params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z));
Name(params.id, fmt::format("fs_in_attr{}", semantic));
params.is_loaded = true;
}
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) { switch (default_value) {
case 0: case 0:
@ -250,33 +269,42 @@ void EmitContext::DefineInputs() {
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) { const auto fetch_shader = Gcn::ParseFetchShader(info);
ASSERT(input.binding < IR::NumParams); if (!fetch_shader) {
const Id type{GetAttributeType(*this, input.fmt)[4]}; break;
if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || }
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) { for (const auto& attrib : fetch_shader->attributes) {
ASSERT(attrib.semantic < IR::NumParams);
const auto sharp = attrib.GetSharp(info);
const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
if (attrib.UsesStepRates()) {
const u32 rate_idx = const u32 rate_idx =
input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0 attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
: 1; : 1;
const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
const auto buffer =
std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
return buffer.instance_attrib == attrib.semantic;
});
// Note that we pass index rather than Id // Note that we pass index rather than Id
input_params[input.binding] = { input_params[attrib.semantic] = SpirvAttribute{
rate_idx, .id = rate_idx,
input_u32, .pointer_type = input_u32,
U32[1], .component_type = U32[1],
input.num_components, .num_components = std::min<u16>(attrib.num_elements, num_components),
true, .is_integer = true,
false, .is_loaded = false,
input.instance_data_buf, .buffer_handle = int(buffer - info.buffers.begin()),
}; };
} else { } else {
Id id{DefineInput(type, input.binding)}; Id id{DefineInput(type, attrib.semantic)};
if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) { if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
Name(id, fmt::format("vs_instance_attr{}", input.binding)); Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
} else { } else {
Name(id, fmt::format("vs_in_attr{}", input.binding)); Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
} }
input_params[input.binding] = GetAttributeInfo(input.fmt, id, 4, false); input_params[attrib.semantic] =
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
interfaces.push_back(id); interfaces.push_back(id);
} }
} }
@ -286,6 +314,10 @@ void EmitContext::DefineInputs() {
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
if (profile.needs_manual_interpolation) {
gl_bary_coord_id =
DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
}
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i]; const auto& input = runtime_info.fs_info.inputs[i];
const u32 semantic = input.param_index; const u32 semantic = input.param_index;
@ -299,14 +331,21 @@ void EmitContext::DefineInputs() {
const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
const u32 num_components = info.loads.NumComponents(param); const u32 num_components = info.loads.NumComponents(param);
const Id type{F32[num_components]}; const Id type{F32[num_components]};
const Id id{DefineInput(type, semantic)}; Id attr_id{};
if (input.is_flat) { if (profile.needs_manual_interpolation && !input.is_flat) {
Decorate(id, spv::Decoration::Flat); attr_id = DefineInput(TypeArray(type, ConstU32(3U)), semantic);
Decorate(attr_id, spv::Decoration::PerVertexKHR);
Name(attr_id, fmt::format("fs_in_attr{}_p", semantic));
} else {
attr_id = DefineInput(type, semantic);
Name(attr_id, fmt::format("fs_in_attr{}", semantic));
}
if (input.is_flat) {
Decorate(attr_id, spv::Decoration::Flat);
} }
Name(id, fmt::format("fs_in_attr{}", semantic));
input_params[semantic] = input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, false); GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
interfaces.push_back(id); interfaces.push_back(attr_id);
} }
break; break;
case Stage::Compute: case Stage::Compute:
@ -512,9 +551,10 @@ void EmitContext::DefineBuffers() {
void EmitContext::DefineTextureBuffers() { void EmitContext::DefineTextureBuffers() {
for (const auto& desc : info.texture_buffers) { for (const auto& desc : info.texture_buffers) {
const bool is_integer = const auto sharp = desc.GetSharp(info);
desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint; const auto nfmt = sharp.GetNumberFmt();
const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)}; const bool is_integer = AmdGpu::IsInteger(nfmt);
const VectorIds& sampled_type{GetAttributeType(*this, nfmt)};
const u32 sampled = desc.is_written ? 2 : 1; const u32 sampled = desc.is_written ? 2 : 1;
const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false, const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
sampled, spv::ImageFormat::Unknown)}; sampled, spv::ImageFormat::Unknown)};
@ -609,10 +649,11 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
} }
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx); const auto image = desc.GetSharp(ctx.info);
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown; const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
const auto type = image.GetBoundType();
const u32 sampled = desc.is_storage ? 2 : 1; const u32 sampled = desc.is_storage ? 2 : 1;
switch (desc.type) { switch (type) {
case AmdGpu::ImageType::Color1D: case AmdGpu::ImageType::Color1D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format); return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
case AmdGpu::ImageType::Color1DArray: case AmdGpu::ImageType::Color1DArray:
@ -631,14 +672,15 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
default: default:
break; break;
} }
throw InvalidArgument("Invalid texture type {}", desc.type); throw InvalidArgument("Invalid texture type {}", type);
} }
void EmitContext::DefineImagesAndSamplers() { void EmitContext::DefineImagesAndSamplers() {
for (const auto& image_desc : info.images) { for (const auto& image_desc : info.images) {
const bool is_integer = image_desc.nfmt == AmdGpu::NumberFormat::Uint || const auto sharp = image_desc.GetSharp(info);
image_desc.nfmt == AmdGpu::NumberFormat::Sint; const auto nfmt = sharp.GetNumberFmt();
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt); const bool is_integer = AmdGpu::IsInteger(nfmt);
const VectorIds& data_types = GetAttributeType(*this, nfmt);
const Id sampled_type = data_types[1]; const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};

View File

@ -42,7 +42,9 @@ public:
~EmitContext(); ~EmitContext();
Id Def(const IR::Value& value); Id Def(const IR::Value& value);
void DefineBufferOffsets(); void DefineBufferOffsets();
void DefineInterpolatedAttribs();
[[nodiscard]] Id DefineInput(Id type, u32 location) { [[nodiscard]] Id DefineInput(Id type, u32 location) {
const Id input_id{DefineVar(type, spv::StorageClass::Input)}; const Id input_id{DefineVar(type, spv::StorageClass::Input)};
@ -197,6 +199,9 @@ public:
Id shared_memory_u32_type{}; Id shared_memory_u32_type{};
Id interpolate_func{};
Id gl_bary_coord_id{};
struct TextureDefinition { struct TextureDefinition {
const VectorIds* data_types; const VectorIds* data_types;
Id id; Id id;
@ -241,7 +246,7 @@ public:
Id component_type; Id component_type;
u32 num_components; u32 num_components;
bool is_integer{}; bool is_integer{};
bool is_default{}; bool is_loaded{};
s32 buffer_handle{-1}; s32 buffer_handle{-1};
}; };
std::array<SpirvAttribute, IR::NumParams> input_params{}; std::array<SpirvAttribute, IR::NumParams> input_params{};

View File

@ -34,8 +34,14 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions. * We take the reverse way, extract the original input semantics from these instructions.
**/ **/
FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
FetchShaderData data{}; if (!info.has_fetch_shader) {
return std::nullopt;
}
const u32* code;
std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
FetchShaderData data{.code = code};
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max()); GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder; GcnDecodeContext decoder;
@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
u32 semantic_index = 0; u32 semantic_index = 0;
while (!code_slice.atEnd()) { while (!code_slice.atEnd()) {
const auto inst = decoder.decodeInstruction(code_slice); const auto inst = decoder.decodeInstruction(code_slice);
*out_size += inst.length; data.size += inst.length;
if (inst.opcode == Opcode::S_SETPC_B64) { if (inst.opcode == Opcode::S_SETPC_B64) {
break; break;

View File

@ -3,26 +3,80 @@
#pragma once #pragma once
#include <ranges>
#include <vector> #include <vector>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/info.h"
namespace Shader::Gcn { namespace Shader::Gcn {
struct VertexAttribute { struct VertexAttribute {
enum InstanceIdType : u8 {
None = 0,
OverStepRate0 = 1,
OverStepRate1 = 2,
Plain = 3,
};
u8 semantic; ///< Semantic index of the attribute u8 semantic; ///< Semantic index of the attribute
u8 dest_vgpr; ///< Destination VGPR to load first component. u8 dest_vgpr; ///< Destination VGPR to load first component.
u8 num_elements; ///< Number of components to load u8 num_elements; ///< Number of components to load
u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V#
u8 dword_offset; ///< The dword offset of the V# that describes this attribute. u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
[[nodiscard]] InstanceIdType GetStepRate() const {
return static_cast<InstanceIdType>(instance_data);
}
[[nodiscard]] bool UsesStepRates() const {
const auto step_rate = GetStepRate();
return step_rate == OverStepRate0 || step_rate == OverStepRate1;
}
[[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
}
bool operator==(const VertexAttribute& other) const {
return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
dword_offset == other.dword_offset && instance_data == other.instance_data;
}
}; };
struct FetchShaderData { struct FetchShaderData {
const u32* code;
u32 size = 0;
std::vector<VertexAttribute> attributes; std::vector<VertexAttribute> attributes;
s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR
s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
[[nodiscard]] bool UsesStepRates() const {
return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
return attribute.UsesStepRates();
}) != attributes.end();
}
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
const Info& info) const {
u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0;
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
vertex_offset = info.user_data[vertex_offset_sgpr];
}
if (instance_offset_sgpr != -1) {
instance_offset = info.user_data[instance_offset_sgpr];
}
return {vertex_offset, instance_offset};
}
bool operator==(const FetchShaderData& other) const {
return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
instance_offset_sgpr == other.instance_offset_sgpr;
}
}; };
FetchShaderData ParseFetchShader(const u32* code, u32* out_size); std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);
} // namespace Shader::Gcn } // namespace Shader::Gcn

View File

@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
void Translator::EmitFetch(const GcnInst& inst) { void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly. // Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code; info.has_fetch_shader = true;
const u32* code; info.fetch_shader_sgpr_base = inst.src[0].code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes. const auto fetch_data = ParseFetchShader(info);
u32 fetch_size{}; ASSERT(fetch_data.has_value());
const auto fetch_data = ParseFetchShader(code, &fetch_size);
if (Config::dumpShaders()) { if (Config::dumpShaders()) {
using namespace Common::FS; using namespace Common::FS;
@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
} }
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size); file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
} }
info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr; for (const auto& attrib : fetch_data->attributes) {
info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
for (const auto& attrib : fetch_data.attributes) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr}; IR::VectorReg dst_reg{attrib.dest_vgpr};
@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {
// In case of programmable step rates we need to fallback to instance data pulling in // In case of programmable step rates we need to fallback to instance data pulling in
// shader, so VBs should be bound as regular data buffers // shader, so VBs should be bound as regular data buffers
s32 instance_buf_handle = -1; if (attrib.UsesStepRates()) {
const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
if (step_rate == Info::VsInput::OverStepRate0 ||
step_rate == Info::VsInput::OverStepRate1) {
info.buffers.push_back({ info.buffers.push_back({
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4), .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
.used_types = IR::Type::F32, .used_types = IR::Type::F32,
.is_instance_data = true, .is_instance_data = true,
.instance_attrib = attrib.semantic,
}); });
instance_buf_handle = s32(info.buffers.size() - 1);
info.uses_step_rates = true;
} }
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
info.vs_inputs.push_back({
.fmt = buffer.GetNumberFmt(),
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
.dword_offset = attrib.dword_offset,
.instance_step_rate = step_rate,
.instance_data_buf = instance_buf_handle,
});
} }
} }

View File

@ -45,6 +45,7 @@ struct BufferResource {
AmdGpu::Buffer inline_cbuf; AmdGpu::Buffer inline_cbuf;
bool is_gds_buffer{}; bool is_gds_buffer{};
bool is_instance_data{}; bool is_instance_data{};
u8 instance_attrib{};
bool is_written{}; bool is_written{};
bool IsStorage(AmdGpu::Buffer buffer) const noexcept { bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
@ -57,7 +58,6 @@ using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
struct TextureBufferResource { struct TextureBufferResource {
u32 sharp_idx; u32 sharp_idx;
AmdGpu::NumberFormat nfmt;
bool is_written{}; bool is_written{};
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
@ -66,8 +66,6 @@ using TextureBufferResourceList = boost::container::small_vector<TextureBufferRe
struct ImageResource { struct ImageResource {
u32 sharp_idx; u32 sharp_idx;
AmdGpu::ImageType type;
AmdGpu::NumberFormat nfmt;
bool is_storage{}; bool is_storage{};
bool is_depth{}; bool is_depth{};
bool is_atomic{}; bool is_atomic{};
@ -115,24 +113,6 @@ static_assert(sizeof(PushData) <= 128,
* Contains general information generated by the shader recompiler for an input program. * Contains general information generated by the shader recompiler for an input program.
*/ */
struct Info { struct Info {
struct VsInput {
enum InstanceIdType : u8 {
None = 0,
OverStepRate0 = 1,
OverStepRate1 = 2,
Plain = 3,
};
AmdGpu::NumberFormat fmt;
u16 binding;
u16 num_components;
u8 sgpr_base;
u8 dword_offset;
InstanceIdType instance_step_rate;
s32 instance_data_buf;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
struct AttributeFlags { struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const { bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[Index(attrib)] & (1 << comp); return flags[Index(attrib)] & (1 << comp);
@ -179,9 +159,6 @@ struct Info {
CopyShaderData gs_copy_data; CopyShaderData gs_copy_data;
s8 vertex_offset_sgpr = -1;
s8 instance_offset_sgpr = -1;
BufferResourceList buffers; BufferResourceList buffers;
TextureBufferResourceList texture_buffers; TextureBufferResourceList texture_buffers;
ImageResourceList images; ImageResourceList images;
@ -208,10 +185,11 @@ struct Info {
bool uses_shared{}; bool uses_shared{};
bool uses_fp16{}; bool uses_fp16{};
bool uses_fp64{}; bool uses_fp64{};
bool uses_step_rates{};
bool translation_failed{}; // indicates that shader has unsupported instructions bool translation_failed{}; // indicates that shader has unsupported instructions
bool has_readconst{}; bool has_readconst{};
u8 mrt_mask{0u}; u8 mrt_mask{0u};
bool has_fetch_shader{false};
u32 fetch_shader_sgpr_base{0u};
explicit Info(Stage stage_, ShaderParams params) explicit Info(Stage stage_, ShaderParams params)
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
@ -252,18 +230,6 @@ struct Info {
bnd.user_data += ud_mask.NumRegs(); bnd.user_data += ud_mask.NumRegs();
} }
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const {
u32 vertex_offset = regs.index_offset;
u32 instance_offset = 0;
if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
vertex_offset = user_data[vertex_offset_sgpr];
}
if (instance_offset_sgpr != -1) {
instance_offset = user_data[instance_offset_sgpr];
}
return {vertex_offset, instance_offset};
}
void RefreshFlatBuf() { void RefreshFlatBuf() {
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw); flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
ASSERT(user_data.size() <= NumUserDataRegs); ASSERT(user_data.size() <= NumUserDataRegs);
@ -284,7 +250,12 @@ constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const
} }
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx); const auto image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
if (!image.Valid()) {
// Fall back to null image if unbound.
return AmdGpu::Image::Null();
}
return image;
} }
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {

View File

@ -381,7 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp); const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
const s32 binding = descriptors.Add(TextureBufferResource{ const s32 binding = descriptors.Add(TextureBufferResource{
.sharp_idx = sharp, .sharp_idx = sharp,
.nfmt = buffer.GetNumberFmt(),
.is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32, .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
}); });
@ -660,11 +659,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
} }
} }
const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
u32 image_binding = descriptors.Add(ImageResource{ u32 image_binding = descriptors.Add(ImageResource{
.sharp_idx = tsharp, .sharp_idx = tsharp,
.type = type,
.nfmt = image.GetNumberFmt(),
.is_storage = is_storage, .is_storage = is_storage,
.is_depth = bool(inst_info.is_depth), .is_depth = bool(inst_info.is_depth),
.is_atomic = IsImageAtomicInstruction(inst), .is_atomic = IsImageAtomicInstruction(inst),

View File

@ -22,8 +22,10 @@ struct Profile {
bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_preserve{};
bool support_fp32_denorm_flush{}; bool support_fp32_denorm_flush{};
bool support_explicit_workgroup_layout{}; bool support_explicit_workgroup_layout{};
bool support_legacy_vertex_attributes{};
bool has_broken_spirv_clamp{}; bool has_broken_spirv_clamp{};
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool needs_manual_interpolation{};
u64 min_ssbo_alignment{}; u64 min_ssbo_alignment{};
}; };

View File

@ -6,12 +6,19 @@
#include <bitset> #include <bitset>
#include "common/types.h" #include "common/types.h"
#include "frontend/fetch_shader.h"
#include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "shader_recompiler/ir/passes/srt.h" #include "shader_recompiler/ir/passes/srt.h"
namespace Shader { namespace Shader {
struct VsAttribSpecialization {
AmdGpu::NumberClass num_class{};
auto operator<=>(const VsAttribSpecialization&) const = default;
};
struct BufferSpecialization { struct BufferSpecialization {
u16 stride : 14; u16 stride : 14;
u16 is_storage : 1; u16 is_storage : 1;
@ -50,6 +57,8 @@ struct StageSpecialization {
const Shader::Info* info; const Shader::Info* info;
RuntimeInfo runtime_info; RuntimeInfo runtime_info;
Gcn::FetchShaderData fetch_shader_data{};
boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
std::bitset<MaxStageResources> bitset{}; std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers; boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers; boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
@ -57,9 +66,19 @@ struct StageSpecialization {
boost::container::small_vector<FMaskSpecialization, 8> fmasks; boost::container::small_vector<FMaskSpecialization, 8> fmasks;
Backend::Bindings start{}; Backend::Bindings start{};
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
Backend::Bindings start_) const Profile& profile_, Backend::Bindings start_)
: info{&info_}, runtime_info{runtime_info_}, start{start_} { : info{&info_}, runtime_info{runtime_info_}, start{start_} {
if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
fetch_shader_data = *fetch_shader;
if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
// Specialize shader on VS input number types to follow spec.
ForEachSharp(vs_attribs, fetch_shader_data.attributes,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
});
}
}
u32 binding{}; u32 binding{};
if (info->has_readconst) { if (info->has_readconst) {
binding++; binding++;
@ -75,8 +94,7 @@ struct StageSpecialization {
}); });
ForEachSharp(binding, images, info->images, ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) { [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray spec.type = sharp.GetBoundType();
: sharp.GetType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
}); });
ForEachSharp(binding, fmasks, info->fmasks, ForEachSharp(binding, fmasks, info->fmasks,
@ -86,6 +104,17 @@ struct StageSpecialization {
}); });
} }
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
continue;
}
func(spec, desc, sharp);
}
}
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) { for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back(); auto& spec = spec_list.emplace_back();
@ -106,6 +135,14 @@ struct StageSpecialization {
if (runtime_info != other.runtime_info) { if (runtime_info != other.runtime_info) {
return false; return false;
} }
if (fetch_shader_data != other.fetch_shader_data) {
return false;
}
for (u32 i = 0; i < vs_attribs.size(); i++) {
if (vs_attribs[i] != other.vs_attribs[i]) {
return false;
}
}
u32 binding{}; u32 binding{};
if (info->has_readconst != other.info->has_readconst) { if (info->has_readconst != other.info->has_readconst) {
return false; return false;

View File

@ -10,7 +10,24 @@
namespace AmdGpu { namespace AmdGpu {
[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { enum NumberClass {
Float,
Sint,
Uint,
};
[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
switch (nfmt) {
case NumberFormat::Sint:
return Sint;
case NumberFormat::Uint:
return Uint;
default:
return Float;
}
}
[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
} }

View File

@ -304,6 +304,10 @@ struct Image {
const auto viewed_slice = last_array - base_array + 1; const auto viewed_slice = last_array - base_array + 1;
return GetType() == ImageType::Cube && viewed_slice < 6; return GetType() == ImageType::Cube && viewed_slice < 6;
} }
ImageType GetBoundType() const noexcept {
return IsPartialCubemap() ? ImageType::Color2DArray : GetType();
}
}; };
static_assert(sizeof(Image) == 32); // 256bits static_assert(sizeof(Image) == 32); // 256bits

View File

@ -5,6 +5,7 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/info.h" #include "shader_recompiler/info.h"
#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -107,7 +108,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
} }
} }
bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { bool BufferCache::BindVertexBuffers(
const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes; boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings; boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
SCOPE_EXIT { SCOPE_EXIT {
@ -126,7 +128,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
} }
}; };
if (vs_info.vs_inputs.empty()) { if (!fetch_shader || fetch_shader->attributes.empty()) {
return false; return false;
} }
@ -150,30 +152,29 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
// Calculate buffers memory overlaps // Calculate buffers memory overlaps
bool has_step_rate = false; bool has_step_rate = false;
boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{}; boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
for (const auto& input : vs_info.vs_inputs) { for (const auto& attrib : fetch_shader->attributes) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
has_step_rate = true; has_step_rate = true;
continue; continue;
} }
const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset); const auto& buffer = attrib.GetSharp(vs_info);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
guest_buffers.emplace_back(buffer); guest_buffers.emplace_back(buffer);
ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
attributes.push_back({ attributes.push_back({
.location = input.binding, .location = attrib.semantic,
.binding = input.binding, .binding = attrib.semantic,
.format = .format =
Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0, .offset = 0,
}); });
bindings.push_back({ bindings.push_back({
.binding = input.binding, .binding = attrib.semantic,
.stride = buffer.GetStride(), .stride = buffer.GetStride(),
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
? vk::VertexInputRate::eVertex ? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance, : vk::VertexInputRate::eInstance,
.divisor = 1, .divisor = 1,
@ -236,7 +237,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
// Emulate QuadList primitive type with CPU made index buffer. // Emulate QuadList primitive type with CPU made index buffer.
const auto& regs = liverpool->regs; const auto& regs = liverpool->regs;
if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList && !is_indexed) {
is_indexed = true; is_indexed = true;
// Emit indices. // Emit indices.
@ -262,6 +263,32 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
VAddr index_address = regs.index_base_address.Address<VAddr>(); VAddr index_address = regs.index_base_address.Address<VAddr>();
index_address += index_offset * index_size; index_address += index_offset * index_size;
if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) {
// Convert indices.
const u32 new_index_size = regs.num_indices * index_size * 6 / 4;
const auto [data, offset] = stream_buffer.Map(new_index_size);
const auto index_ptr = reinterpret_cast<u8*>(index_address);
switch (index_type) {
case vk::IndexType::eUint16:
Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices<u16>(data, index_ptr,
regs.num_indices);
break;
case vk::IndexType::eUint32:
Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices<u32>(data, index_ptr,
regs.num_indices);
break;
default:
UNREACHABLE_MSG("Unsupported QuadList index type {}", vk::to_string(index_type));
break;
}
stream_buffer.Commit();
// Bind index buffer.
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type);
return new_index_size / index_size;
}
// Bind index buffer. // Bind index buffer.
const u32 index_buffer_size = regs.num_indices * index_size; const u32 index_buffer_size = regs.num_indices * index_size;
const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false);

View File

@ -20,8 +20,11 @@ struct Liverpool;
} }
namespace Shader { namespace Shader {
struct Info; namespace Gcn {
struct FetchShaderData;
} }
struct Info;
} // namespace Shader
namespace VideoCore { namespace VideoCore {
@ -76,7 +79,8 @@ public:
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
/// Binds host vertex buffers for the current draw. /// Binds host vertex buffers for the current draw.
bool BindVertexBuffers(const Shader::Info& vs_info); bool BindVertexBuffers(const Shader::Info& vs_info,
const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);
/// Bind host index buffer for the current draw. /// Bind host index buffer for the current draw.
u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);

View File

@ -726,19 +726,6 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
return format->vk_format; return format->vk_format;
} }
void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
static constexpr u16 NumVerticesPerQuad = 4;
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 2;
*out_data++ = i + 3;
}
}
vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
const auto comp_swap = color_buffer.info.comp_swap.Value(); const auto comp_swap = color_buffer.info.comp_swap.Value();
const auto format = color_buffer.info.format.Value(); const auto format = color_buffer.info.format.Value();

View File

@ -68,7 +68,33 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color
vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags);
void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); static constexpr u16 NumVerticesPerQuad = 4;
inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
u16* out_data = reinterpret_cast<u16*>(out_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = i;
*out_data++ = i + 1;
*out_data++ = i + 2;
*out_data++ = i;
*out_data++ = i + 2;
*out_data++ = i + 3;
}
}
template <typename T>
void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) {
T* out_data = reinterpret_cast<T*>(out_ptr);
const T* in_data = reinterpret_cast<const T*>(in_ptr);
for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
*out_data++ = in_data[i];
*out_data++ = in_data[i + 1];
*out_data++ = in_data[i + 2];
*out_data++ = in_data[i];
*out_data++ = in_data[i + 2];
*out_data++ = in_data[i + 3];
}
}
static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { static inline vk::Format PromoteFormatToDepth(vk::Format fmt) {
if (fmt == vk::Format::eR32Sfloat) { if (fmt == vk::Format::eR32Sfloat) {

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm> #include <algorithm>
#include <utility>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
@ -10,6 +11,8 @@
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
@ -20,8 +23,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos, std::span<const Shader::Info*, MaxShaderStages> infos,
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
std::span<const vk::ShaderModule> modules) std::span<const vk::ShaderModule> modules)
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
fetch_shader{std::move(fetch_shader_)} {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin()); std::ranges::copy(infos, stages.begin());
BuildDescSetLayout(); BuildDescSetLayout();
@ -46,32 +51,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings; boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes; boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
if (!instance.IsVertexInputDynamicState()) { if (fetch_shader && !instance.IsVertexInputDynamicState()) {
const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; const auto& vs_info = GetStage(Shader::Stage::Vertex);
for (const auto& input : vs_info->vs_inputs) { for (const auto& attrib : fetch_shader->attributes) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader // Skip attribute binding as the data will be pulled by shader
continue; continue;
} }
const auto buffer = const auto buffer = attrib.GetSharp(vs_info);
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
vertex_attributes.push_back({ vertex_attributes.push_back({
.location = input.binding, .location = attrib.semantic,
.binding = input.binding, .binding = attrib.semantic,
.format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
.offset = 0, .offset = 0,
}); });
vertex_bindings.push_back({ vertex_bindings.push_back({
.binding = input.binding, .binding = attrib.semantic,
.stride = buffer.GetStride(), .stride = buffer.GetStride(),
.inputRate = input.instance_step_rate == Shader::Info::VsInput::None .inputRate =
? vk::VertexInputRate::eVertex attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
: vk::VertexInputRate::eInstance, ? vk::VertexInputRate::eVertex
: vk::VertexInputRate::eInstance,
}); });
} }
} }

View File

@ -4,6 +4,7 @@
#include <xxhash.h> #include <xxhash.h>
#include "common/types.h" #include "common/types.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h"
@ -59,9 +60,14 @@ public:
GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> stages, std::span<const Shader::Info*, MaxShaderStages> stages,
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
std::span<const vk::ShaderModule> modules); std::span<const vk::ShaderModule> modules);
~GraphicsPipeline(); ~GraphicsPipeline();
const std::optional<const Shader::Gcn::FetchShaderData>& GetFetchShader() const noexcept {
return fetch_shader;
}
bool IsEmbeddedVs() const noexcept { bool IsEmbeddedVs() const noexcept {
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
@ -94,6 +100,7 @@ private:
private: private:
GraphicsPipelineKey key; GraphicsPipelineKey key;
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader{};
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -256,6 +256,7 @@ bool Instance::CreateDevice() {
workgroup_memory_explicit_layout = workgroup_memory_explicit_layout =
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks // The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
@ -264,6 +265,7 @@ bool Instance::CreateDevice() {
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions. // with extensions.
@ -399,6 +401,12 @@ bool Instance::CreateDevice() {
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{ vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
.primitiveTopologyListRestart = true, .primitiveTopologyListRestart = true,
}, },
vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{
.fragmentShaderBarycentric = true,
},
vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
.legacyVertexAttributes = true,
},
#ifdef __APPLE__ #ifdef __APPLE__
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(), feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
#endif #endif
@ -438,6 +446,12 @@ bool Instance::CreateDevice() {
if (!vertex_input_dynamic_state) { if (!vertex_input_dynamic_state) {
device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>(); device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
} }
if (!fragment_shader_barycentric) {
device_chain.unlink<vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
}
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) { if (device_result != vk::Result::eSuccess) {

View File

@ -143,10 +143,21 @@ public:
return maintenance5; return maintenance5;
} }
/// Returns true when VK_KHR_fragment_shader_barycentric is supported.
bool IsFragmentShaderBarycentricSupported() const {
return fragment_shader_barycentric;
}
/// Returns true when VK_EXT_primitive_topology_list_restart is supported.
bool IsListRestartSupported() const { bool IsListRestartSupported() const {
return list_restart; return list_restart;
} }
/// Returns true when VK_EXT_legacy_vertex_attributes is supported.
bool IsLegacyVertexAttributesSupported() const {
return legacy_vertex_attributes;
}
/// Returns true when geometry shaders are supported by the device /// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const { bool IsGeometryStageSupported() const {
return features.geometryShader; return features.geometryShader;
@ -315,6 +326,7 @@ private:
bool null_descriptor{}; bool null_descriptor{};
bool maintenance5{}; bool maintenance5{};
bool list_restart{}; bool list_restart{};
bool legacy_vertex_attributes{};
u64 min_imported_host_pointer_alignment{}; u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{}; u32 subgroup_size{};
bool tooling_info{}; bool tooling_info{};

View File

@ -169,6 +169,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_explicit_workgroup_layout = true, .support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
}; };
auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
@ -185,7 +188,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
if (is_new) { if (is_new) {
it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key, it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key,
*pipeline_cache, infos, modules); *pipeline_cache, infos, fetch_shader, modules);
} }
return it->second; return it->second;
} }
@ -302,8 +305,12 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
auto params = Liverpool::GetParams(*pgm); auto params = Liverpool::GetParams(*pgm);
std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) = std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
GetProgram(stage_in, params, binding); std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
if (fetch_shader_) {
fetch_shader = fetch_shader_;
}
return true; return true;
}; };
@ -339,16 +346,14 @@ bool PipelineCache::RefreshGraphicsKey() {
} }
} }
const auto* vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)]; const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
if (vs_info && !instance.IsVertexInputDynamicState()) { if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
u32 vertex_binding = 0; u32 vertex_binding = 0;
for (const auto& input : vs_info->vs_inputs) { for (const auto& attrib : fetch_shader->attributes) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || if (attrib.UsesStepRates()) {
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
continue; continue;
} }
const auto& buffer = const auto& buffer = attrib.GetSharp(*vs_info);
vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
if (buffer.GetSize() == 0) { if (buffer.GetSize() == 0) {
continue; continue;
} }
@ -392,7 +397,7 @@ bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{}; Shader::Backend::Bindings binding{};
const auto* cs_pgm = &liverpool->regs.cs_program; const auto* cs_pgm = &liverpool->regs.cs_program;
const auto cs_params = Liverpool::GetParams(*cs_pgm); const auto cs_params = Liverpool::GetParams(*cs_pgm);
std::tie(infos[0], modules[0], compute_key) = std::tie(infos[0], modules[0], fetch_shader, compute_key) =
GetProgram(Shader::Stage::Compute, cs_params, binding); GetProgram(Shader::Stage::Compute, cs_params, binding);
return true; return true;
} }
@ -423,24 +428,26 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
return module; return module;
} }
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram( std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) { PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage); const auto runtime_info = BuildRuntimeInfo(stage);
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
if (new_program) { if (new_program) {
Program* program = program_pool.Create(stage, params); Program* program = program_pool.Create(stage, params);
auto start = binding; auto start = binding;
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
program->AddPermut(module, std::move(spec)); program->AddPermut(module, std::move(spec));
it_pgm.value() = program; it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); return std::make_tuple(&program->info, module, spec.fetch_shader_data,
HashCombine(params.hash, 0));
} }
Program* program = it_pgm->second; Program* program = it_pgm->second;
auto& info = program->info; auto& info = program->info;
info.RefreshFlatBuf(); info.RefreshFlatBuf();
const auto spec = Shader::StageSpecialization(info, runtime_info, binding); const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
size_t perm_idx = program->modules.size(); size_t perm_idx = program->modules.size();
vk::ShaderModule module{}; vk::ShaderModule module{};
@ -454,7 +461,8 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
module = it->module; module = it->module;
perm_idx = std::distance(program->modules.begin(), it); perm_idx = std::distance(program->modules.begin(), it);
} }
return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); return std::make_tuple(&info, module, spec.fetch_shader_data,
HashCombine(params.hash, perm_idx));
} }
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,

View File

@ -47,8 +47,10 @@ public:
const ComputePipeline* GetComputePipeline(); const ComputePipeline* GetComputePipeline();
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram( std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); u64>
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
Shader::Backend::Bindings& binding);
private: private:
bool RefreshGraphicsKey(); bool RefreshGraphicsKey();
@ -80,6 +82,7 @@ private:
tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines; tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines;
std::array<const Shader::Info*, MaxShaderStages> infos{}; std::array<const Shader::Info*, MaxShaderStages> infos{};
std::array<vk::ShaderModule, MaxShaderStages> modules{}; std::array<vk::ShaderModule, MaxShaderStages> modules{};
std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
GraphicsPipelineKey graphics_key{}; GraphicsPipelineKey graphics_key{};
u64 compute_key{}; u64 compute_key{};
}; };

View File

@ -187,13 +187,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
} }
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info); const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
BeginRendering(*pipeline, state); BeginRendering(*pipeline, state);
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs); const auto [vertex_offset, instance_offset] = fetch_shader->GetDrawOffsets(regs, vs_info);
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
@ -243,7 +244,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
} }
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info); const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
buffer_cache.BindIndexBuffer(is_indexed, 0); buffer_cache.BindIndexBuffer(is_indexed, 0);
const auto& [buffer, base] = const auto& [buffer, base] =
@ -397,10 +399,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (!stage) { if (!stage) {
continue; continue;
} }
if (stage->uses_step_rates) { push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step0 = regs.vgt_instance_step_rate_0; push_data.step1 = regs.vgt_instance_step_rate_1;
push_data.step1 = regs.vgt_instance_step_rate_1;
}
stage->PushUd(binding, push_data); stage->PushUd(binding, push_data);
BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers); BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);

View File

@ -87,12 +87,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
range.extent.levels = image.last_level - image.base_level + 1; range.extent.levels = image.last_level - image.base_level + 1;
} }
range.extent.layers = image.last_array - image.base_array + 1; range.extent.layers = image.last_array - image.base_array + 1;
type = ConvertImageViewType(image.GetType()); type = ConvertImageViewType(image.GetBoundType());
// Adjust view type for partial cubemaps and arrays // Adjust view type for arrays
if (image.IsPartialCubemap()) {
type = vk::ImageViewType::e2DArray;
}
if (type == vk::ImageViewType::eCube) { if (type == vk::ImageViewType::eCube) {
if (desc.is_array) { if (desc.is_array) {
type = vk::ImageViewType::eCubeArray; type = vk::ImageViewType::eCubeArray;

View File

@ -182,12 +182,15 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
case vk::Format::eB8G8R8A8Srgb: case vk::Format::eB8G8R8A8Srgb:
case vk::Format::eB8G8R8A8Unorm: case vk::Format::eB8G8R8A8Unorm:
case vk::Format::eR8G8B8A8Unorm: case vk::Format::eR8G8B8A8Unorm:
case vk::Format::eR8G8B8A8Snorm:
case vk::Format::eR8G8B8A8Uint: case vk::Format::eR8G8B8A8Uint:
case vk::Format::eR32Sfloat: case vk::Format::eR32Sfloat:
case vk::Format::eR32Uint: case vk::Format::eR32Uint:
case vk::Format::eR16G16Sfloat: case vk::Format::eR16G16Sfloat:
case vk::Format::eR16G16Unorm: case vk::Format::eR16G16Unorm:
case vk::Format::eR16G16Snorm:
case vk::Format::eB10G11R11UfloatPack32: case vk::Format::eB10G11R11UfloatPack32:
case vk::Format::eA2B10G10R10UnormPack32:
return vk::Format::eR32Uint; return vk::Format::eR32Uint;
case vk::Format::eBc1RgbaSrgbBlock: case vk::Format::eBc1RgbaSrgbBlock:
case vk::Format::eBc1RgbaUnormBlock: case vk::Format::eBc1RgbaUnormBlock: