diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 50eda0288..bc2d41bda 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -69,7 +69,7 @@ if (NOT TARGET ZLIB::ZLIB) FetchContent_MakeAvailable(ZLIB) add_library(ZLIB::ZLIB ALIAS zlib) # libpng expects this variable to exist after its find_package(ZLIB) - get_target_property(ZLIB_INCLUDE_DIRS zlib INTERFACE_INCLUDE_DIRECTORIES) + set(ZLIB_INCLUDE_DIRS "${FETCHCONTENT_BASE_DIR}/zlib-build") endif() # SDL3 diff --git a/src/core/devtools/widget/memory_map.cpp b/src/core/devtools/widget/memory_map.cpp index afafd2853..dc8f5c2e9 100644 --- a/src/core/devtools/widget/memory_map.cpp +++ b/src/core/devtools/widget/memory_map.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include @@ -23,7 +24,7 @@ bool MemoryMapViewer::Iterator::DrawLine() { return DrawLine(); } TableNextColumn(); - Text("%zX", m.base); + Text("%" PRIXPTR, m.base); TableNextColumn(); Text("%zX", m.size); TableNextColumn(); @@ -48,9 +49,9 @@ bool MemoryMapViewer::Iterator::DrawLine() { return DrawLine(); } TableNextColumn(); - Text("%llX", m.base); + Text("%" PRIXPTR, m.base); TableNextColumn(); - Text("%llX", m.size); + Text("%zX", m.size); TableNextColumn(); auto type = static_cast<::Libraries::Kernel::MemoryTypes>(m.memory_type); Text("%s", magic_enum::enum_name(type).data()); diff --git a/src/core/libraries/ime/ime.cpp b/src/core/libraries/ime/ime.cpp index 700585ff3..dfd659db8 100644 --- a/src/core/libraries/ime/ime.cpp +++ b/src/core/libraries/ime/ime.cpp @@ -44,10 +44,14 @@ public: openEvent.param.rect.y = m_param.ime.posy; } else { openEvent.param.resource_id_array.userId = 1; - openEvent.param.resource_id_array.resource_id[0] = 1; + openEvent.param.resource_id_array.resourceId[0] = 1; } - Execute(nullptr, &openEvent, true); + // Are we supposed to call the event handler on init with + // ADD_OSK? + if (!ime_mode && False(m_param.key.option & OrbisImeKeyboardOption::AddOsk)) { + Execute(nullptr, &openEvent, true); + } if (ime_mode) { g_ime_state = ImeState(&m_param.ime); @@ -56,6 +60,11 @@ public: } s32 Update(OrbisImeEventHandler handler) { + if (!m_ime_mode) { + /* We don't handle any events for ImeKeyboard */ + return ORBIS_OK; + } + std::unique_lock lock{g_ime_state.queue_mutex}; while (!g_ime_state.event_queue.empty()) { @@ -85,6 +94,16 @@ public: } } + s32 SetText(const char16_t* text, u32 length) { + g_ime_state.SetText(text, length); + return ORBIS_OK; + } + + s32 SetCaret(const OrbisImeCaret* caret) { + g_ime_state.SetCaret(caret->index); + return ORBIS_OK; + } + bool IsIme() { return m_ime_mode; } @@ -98,6 +117,7 @@ private: }; static std::unique_ptr g_ime_handler; +static std::unique_ptr g_keyboard_handler; int PS4_SYSV_ABI FinalizeImeModule() { LOG_ERROR(Lib_Ime, "(STUBBED) called"); @@ -130,9 +150,6 @@ s32 PS4_SYSV_ABI sceImeClose() { if (!g_ime_handler) { return ORBIS_IME_ERROR_NOT_OPENED; } - if (!g_ime_handler->IsIme()) { - return ORBIS_IME_ERROR_NOT_OPENED; - } g_ime_handler.release(); g_ime_ui = ImeUi(); @@ -233,14 +250,11 @@ s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32* s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) { LOG_INFO(Lib_Ime, "(STUBBED) called"); - if (!g_ime_handler) { - return ORBIS_IME_ERROR_NOT_OPENED; - } - if (g_ime_handler->IsIme()) { + if (!g_keyboard_handler) { return ORBIS_IME_ERROR_NOT_OPENED; } - g_ime_handler.release(); + g_keyboard_handler.release(); return ORBIS_OK; } @@ -255,18 +269,17 @@ int PS4_SYSV_ABI sceImeKeyboardGetResourceId() { } s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) { - LOG_ERROR(Lib_Ime, "(STUBBED) called"); + LOG_INFO(Lib_Ime, "called"); if (!param) { return ORBIS_IME_ERROR_INVALID_ADDRESS; } - if (g_ime_handler) { + if (g_keyboard_handler) { return ORBIS_IME_ERROR_BUSY; } - // g_ime_handler = std::make_unique(param); - // return ORBIS_OK; - return ORBIS_IME_ERROR_CONNECTION_FAILED; // Fixup + g_keyboard_handler = std::make_unique(param); + return ORBIS_OK; } int PS4_SYSV_ABI sceImeKeyboardOpenInternal() { @@ -287,16 +300,14 @@ int PS4_SYSV_ABI sceImeKeyboardUpdate() { s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended) { LOG_INFO(Lib_Ime, "called"); - if (!g_ime_handler) { - g_ime_handler = std::make_unique(param); - } else { - if (g_ime_handler->IsIme()) { - return ORBIS_IME_ERROR_BUSY; - } - - g_ime_handler->Init((void*)param, true); + if (!param) { + return ORBIS_IME_ERROR_INVALID_ADDRESS; + } + if (g_ime_handler) { + return ORBIS_IME_ERROR_BUSY; } + g_ime_handler = std::make_unique(param); return ORBIS_OK; } @@ -322,13 +333,29 @@ int PS4_SYSV_ABI sceImeSetCandidateIndex() { } int PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) { - LOG_ERROR(Lib_Ime, "(STUBBED) called"); - return ORBIS_OK; + LOG_TRACE(Lib_Ime, "called"); + + if (!g_ime_handler) { + return ORBIS_IME_ERROR_NOT_OPENED; + } + if (!caret) { + return ORBIS_IME_ERROR_INVALID_ADDRESS; + } + + return g_ime_handler->SetCaret(caret); } -int PS4_SYSV_ABI sceImeSetText() { - LOG_ERROR(Lib_Ime, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) { + LOG_TRACE(Lib_Ime, "called"); + + if (!g_ime_handler) { + return ORBIS_IME_ERROR_NOT_OPENED; + } + if (!text) { + return ORBIS_IME_ERROR_INVALID_ADDRESS; + } + + return g_ime_handler->SetText(text, length); } int PS4_SYSV_ABI sceImeSetTextGeometry() { @@ -337,13 +364,19 @@ int PS4_SYSV_ABI sceImeSetTextGeometry() { } s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) { - LOG_TRACE(Lib_Ime, "called"); + if (g_ime_handler) { + g_ime_handler->Update(handler); + } - if (!g_ime_handler) { + if (g_keyboard_handler) { + g_keyboard_handler->Update(handler); + } + + if (!g_ime_handler || !g_keyboard_handler) { return ORBIS_IME_ERROR_NOT_OPENED; } - return g_ime_handler->Update(handler); + return ORBIS_OK; } int PS4_SYSV_ABI sceImeVshClearPreedit() { diff --git a/src/core/libraries/ime/ime.h b/src/core/libraries/ime/ime.h index 2915b70da..448ee6896 100644 --- a/src/core/libraries/ime/ime.h +++ b/src/core/libraries/ime/ime.h @@ -26,6 +26,24 @@ enum class OrbisImeKeyboardOption : u32 { }; DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption) +enum class OrbisImeOption : u32 { + DEFAULT = 0, + MULTILINE = 1, + NO_AUTO_CAPITALIZATION = 2, + PASSWORD = 4, + LANGUAGES_FORCED = 8, + EXT_KEYBOARD = 16, + NO_LEARNING = 32, + FIXED_POSITION = 64, + DISABLE_RESUME = 256, + DISABLE_AUTO_SPACE = 512, + DISABLE_POSITION_ADJUSTMENT = 2048, + EXPANDED_PREEDIT_BUFFER = 4096, + USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192, + USE_2K_COORDINATES = 16384, +}; +DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption) + struct OrbisImeKeyboardParam { OrbisImeKeyboardOption option; s8 reserved1[4]; @@ -41,9 +59,9 @@ struct OrbisImeParam { OrbisImeEnterLabel enter_label; OrbisImeInputMethod input_method; OrbisImeTextFilter filter; - u32 option; - u32 max_text_length; - char16_t* input_text_buffer; + OrbisImeOption option; + u32 maxTextLength; + char16_t* inputTextBuffer; float posx; float posy; OrbisImeHorizontalAlignment horizontal_alignment; @@ -93,7 +111,7 @@ int PS4_SYSV_ABI sceImeOpenInternal(); void PS4_SYSV_ABI sceImeParamInit(OrbisImeParam* param); int PS4_SYSV_ABI sceImeSetCandidateIndex(); s32 PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret); -int PS4_SYSV_ABI sceImeSetText(); +s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length); int PS4_SYSV_ABI sceImeSetTextGeometry(); s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler); int PS4_SYSV_ABI sceImeVshClearPreedit(); diff --git a/src/core/libraries/ime/ime_common.h b/src/core/libraries/ime/ime_common.h index 77f23d91d..6d4afd81d 100644 --- a/src/core/libraries/ime/ime_common.h +++ b/src/core/libraries/ime/ime_common.h @@ -142,7 +142,7 @@ struct OrbisImeKeycode { struct OrbisImeKeyboardResourceIdArray { s32 userId; - u32 resource_id[6]; + u32 resourceId[5]; }; enum class OrbisImeCaretMovementDirection : u32 { diff --git a/src/core/libraries/ime/ime_ui.cpp b/src/core/libraries/ime/ime_ui.cpp index c5f41c5e8..8eaa48178 100644 --- a/src/core/libraries/ime/ime_ui.cpp +++ b/src/core/libraries/ime/ime_ui.cpp @@ -16,7 +16,7 @@ ImeState::ImeState(const OrbisImeParam* param) { } work_buffer = param->work; - text_buffer = param->input_text_buffer; + text_buffer = param->inputTextBuffer; std::size_t text_len = std::char_traits::length(text_buffer); if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(), @@ -26,15 +26,13 @@ ImeState::ImeState(const OrbisImeParam* param) { } ImeState::ImeState(ImeState&& other) noexcept - : input_changed(other.input_changed), work_buffer(other.work_buffer), - text_buffer(other.text_buffer), current_text(std::move(other.current_text)), - event_queue(std::move(other.event_queue)) { + : work_buffer(other.work_buffer), text_buffer(other.text_buffer), + current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) { other.text_buffer = nullptr; } ImeState& ImeState::operator=(ImeState&& other) noexcept { if (this != &other) { - input_changed = other.input_changed; work_buffer = other.work_buffer; text_buffer = other.text_buffer; current_text = std::move(other.current_text); @@ -63,6 +61,10 @@ void ImeState::SendCloseEvent() { SendEvent(&closeEvent); } +void ImeState::SetText(const char16_t* text, u32 length) {} + +void ImeState::SetCaret(u32 position) {} + bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text, std::size_t utf8_text_len) { std::fill(utf8_text, utf8_text + utf8_text_len, '\0'); @@ -180,9 +182,8 @@ void ImeUi::DrawInputText() { if (first_render) { SetKeyboardFocusHere(); } - if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->max_text_length, + if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength, input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) { - state->input_changed = true; } } @@ -190,6 +191,39 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) { ImeUi* ui = static_cast(data->UserData); ASSERT(ui); + static std::string lastText; + std::string currentText(data->Buf, data->BufTextLen); + if (currentText != lastText) { + OrbisImeEditText eventParam{}; + eventParam.str = reinterpret_cast(ui->ime_param->work); + eventParam.caret_index = data->CursorPos; + eventParam.area_num = 1; + + eventParam.text_area[0].mode = 1; // Edit mode + eventParam.text_area[0].index = data->CursorPos; + eventParam.text_area[0].length = data->BufTextLen; + + if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str, + ui->ime_param->maxTextLength)) { + LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); + return 0; + } + + if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, + ui->ime_param->inputTextBuffer, + ui->ime_param->maxTextLength)) { + LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); + return 0; + } + + OrbisImeEvent event{}; + event.id = OrbisImeEventId::UpdateText; + event.param.text = eventParam; + + lastText = currentText; + ui->state->SendEvent(&event); + } + static int lastCaretPos = -1; if (lastCaretPos == -1) { lastCaretPos = data->CursorPos; @@ -209,39 +243,6 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) { ui->state->SendEvent(&event); } - static std::string lastText; - std::string currentText(data->Buf, data->BufTextLen); - if (currentText != lastText) { - OrbisImeEditText eventParam{}; - eventParam.str = reinterpret_cast(ui->ime_param->work); - eventParam.caret_index = data->CursorPos; - eventParam.area_num = 1; - - eventParam.text_area[0].mode = 1; // Edit mode - eventParam.text_area[0].index = data->CursorPos; - eventParam.text_area[0].length = data->BufTextLen; - - if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str, - ui->ime_param->max_text_length)) { - LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); - return 0; - } - - if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, - ui->ime_param->input_text_buffer, - ui->ime_param->max_text_length)) { - LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8"); - return 0; - } - - OrbisImeEvent event{}; - event.id = OrbisImeEventId::UpdateText; - event.param.text = eventParam; - - lastText = currentText; - ui->state->SendEvent(&event); - } - return 0; } diff --git a/src/core/libraries/ime/ime_ui.h b/src/core/libraries/ime/ime_ui.h index ebd70a7c8..a2a806bb9 100644 --- a/src/core/libraries/ime/ime_ui.h +++ b/src/core/libraries/ime/ime_ui.h @@ -22,10 +22,7 @@ class ImeState { friend class ImeHandler; friend class ImeUi; - bool input_changed = false; - void* work_buffer{}; - char16_t* text_buffer{}; // A character can hold up to 4 bytes in UTF-8 @@ -43,6 +40,9 @@ public: void SendEnterEvent(); void SendCloseEvent(); + void SetText(const char16_t* text, u32 length); + void SetCaret(u32 position); + private: bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text, std::size_t native_text_len); diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index b310c7be9..4028116ef 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -203,7 +203,7 @@ int PS4_SYSV_ABI _sigprocmask() { } int PS4_SYSV_ABI posix_getpagesize() { - return 4096; + return 16_KB; } void RegisterKernel(Core::Loader::SymbolsResolver* sym) { diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 4629980c9..793ddd1fe 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -281,7 +281,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt /* Create thread */ new_thread->native_thr = Core::Thread(); - int ret = new_thread->native_thr.Create(RunThread, new_thread); + int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr); ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret); if (ret) { *thread = nullptr; diff --git a/src/core/thread.cpp b/src/core/thread.cpp index e9c46b522..a93f16c8d 100644 --- a/src/core/thread.cpp +++ b/src/core/thread.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include "libraries/kernel/threads/pthread.h" #include "thread.h" #ifdef _WIN64 @@ -15,7 +16,7 @@ Thread::Thread() : native_handle{0} {} Thread::~Thread() {} -int Thread::Create(ThreadFunc func, void* arg) { +int Thread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) { #ifdef _WIN64 native_handle = CreateThread(nullptr, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, nullptr); return native_handle ? 0 : -1; @@ -23,6 +24,7 @@ int Thread::Create(ThreadFunc func, void* arg) { pthread_t* pthr = reinterpret_cast(&native_handle); pthread_attr_t pattr; pthread_attr_init(&pattr); + pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr); return pthread_create(pthr, &pattr, (PthreadFunc)func, arg); #endif } diff --git a/src/core/thread.h b/src/core/thread.h index 8665100af..cfb8b8309 100644 --- a/src/core/thread.h +++ b/src/core/thread.h @@ -5,6 +5,10 @@ #include "common/types.h" +namespace Libraries::Kernel { +struct PthreadAttr; +} // namespace Libraries::Kernel + namespace Core { class Thread { @@ -15,7 +19,7 @@ public: Thread(); ~Thread(); - int Create(ThreadFunc func, void* arg); + int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr); void Exit(); uintptr_t GetHandle() { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index e84908a57..1e7032f10 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -206,7 +206,7 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) { return main; } -void SetupCapabilities(const Info& info, EmitContext& ctx) { +void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) { ctx.AddCapability(spv::Capability::Image1D); ctx.AddCapability(spv::Capability::Sampled1D); ctx.AddCapability(spv::Capability::ImageQuery); @@ -251,6 +251,10 @@ void SetupCapabilities(const Info& info, EmitContext& ctx) { if (info.stage == Stage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } + if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { + ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); + ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); + } } void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { @@ -342,7 +346,7 @@ std::vector EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in EmitContext ctx{profile, runtime_info, program.info, binding}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); - SetupCapabilities(program.info, ctx); + SetupCapabilities(program.info, profile, ctx); SetupFloatMode(ctx, profile, runtime_info, main); PatchPhiNodes(program, ctx); binding.user_data += program.info.ud_mask.NumRegs(); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 064200d99..d8c0a17bd 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -171,54 +171,38 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); } +Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { + if (IR::IsPosition(attr)) { + ASSERT(attr == IR::Attribute::Position0); + const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ + ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + + if (IR::IsParam(attr)) { + const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; + const auto param = ctx.input_params.at(param_id).id; + const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); + const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; + const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); + } + UNREACHABLE(); +} + Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { if (ctx.info.stage == Stage::Geometry) { - if (IR::IsPosition(attr)) { - ASSERT(attr == IR::Attribute::Position0); - const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), - ctx.ConstU32(0u))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - - if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - UNREACHABLE(); + return EmitGetAttributeForGeometry(ctx, attr, comp, index); } if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; - if (param.buffer_handle < 0) { - if (!ValidId(param.id)) { - // Attribute is disabled or varying component is not written - return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); - } - - Id result; - if (param.is_default) { - result = ctx.OpCompositeExtract(param.component_type, param.id, comp); - } else if (param.num_components > 1) { - const Id pointer{ - ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - result = ctx.OpLoad(param.component_type, pointer); - } else { - result = ctx.OpLoad(param.component_type, param.id); - } - if (param.is_integer) { - result = ctx.OpBitcast(ctx.F32[1], result); - } - return result; - } else { + if (param.buffer_handle >= 0) { const auto step_rate = EmitReadStepRate(ctx, param.id.value); const auto offset = ctx.OpIAdd( ctx.U32[1], @@ -229,7 +213,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { ctx.ConstU32(comp)); return EmitReadConstBuffer(ctx, param.buffer_handle, offset); } + + Id result; + if (param.is_loaded) { + // Attribute is either default or manually interpolated. The id points to an already + // loaded vector. + result = ctx.OpCompositeExtract(param.component_type, param.id, comp); + } else if (param.num_components > 1) { + // Attribute is a vector and we need to access a specific component. + const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; + result = ctx.OpLoad(param.component_type, pointer); + } else { + // Attribute is a single float or interger, simply load it. + result = ctx.OpLoad(param.component_type, param.id); + } + if (param.is_integer) { + result = ctx.OpBitcast(ctx.F32[1], result); + } + return result; } + switch (attr) { case IR::Attribute::FragCoord: { const Id coord = ctx.OpLoad( diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 40e5ea8b9..fe2660705 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -187,7 +187,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); - const auto type = ctx.info.images[handle & 0xFFFF].type; + const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info); + const auto type = sharp.GetBoundType(); const Id zero = ctx.u32_zero_value; const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }}; const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage}; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index e9ffdcce8..4a22ba09f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -8,6 +8,9 @@ namespace Shader::Backend::SPIRV { void EmitPrologue(EmitContext& ctx) { + if (ctx.stage == Stage::Fragment) { + ctx.DefineInterpolatedAttribs(); + } ctx.DefineBufferOffsets(); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index dc404b121..4ce9f4221 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/div_ceil.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/ir/passes/srt.h" #include "video_core/amdgpu/types.h" @@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() { } const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { - switch (fmt) { - case AmdGpu::NumberFormat::Float: - case AmdGpu::NumberFormat::Unorm: - case AmdGpu::NumberFormat::Snorm: - case AmdGpu::NumberFormat::SnormNz: - case AmdGpu::NumberFormat::Sscaled: - case AmdGpu::NumberFormat::Uscaled: - case AmdGpu::NumberFormat::Srgb: + switch (GetNumberClass(fmt)) { + case AmdGpu::NumberClass::Float: return ctx.F32; - case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberClass::Sint: return ctx.S32; - case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberClass::Uint: return ctx.U32; default: break; @@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components, bool output) { - switch (fmt) { - case AmdGpu::NumberFormat::Float: - case AmdGpu::NumberFormat::Unorm: - case AmdGpu::NumberFormat::Snorm: - case AmdGpu::NumberFormat::SnormNz: - case AmdGpu::NumberFormat::Sscaled: - case AmdGpu::NumberFormat::Uscaled: - case AmdGpu::NumberFormat::Srgb: + switch (GetNumberClass(fmt)) { + case AmdGpu::NumberClass::Float: return {id, output ? output_f32 : input_f32, F32[1], num_components, false}; - case AmdGpu::NumberFormat::Uint: + case AmdGpu::NumberClass::Uint: return {id, output ? output_u32 : input_u32, U32[1], num_components, true}; - case AmdGpu::NumberFormat::Sint: + case AmdGpu::NumberClass::Sint: return {id, output ? output_s32 : input_s32, S32[1], num_components, true}; default: break; @@ -222,6 +211,36 @@ void EmitContext::DefineBufferOffsets() { } } +void EmitContext::DefineInterpolatedAttribs() { + if (!profile.needs_manual_interpolation) { + return; + } + // Iterate all input attributes, load them and manually interpolate with barycentric + // coordinates. + for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { + const auto& input = runtime_info.fs_info.inputs[i]; + const u32 semantic = input.param_index; + auto& params = input_params[semantic]; + if (input.is_flat || params.is_loaded) { + continue; + } + const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)}; + const Id p0{OpCompositeExtract(F32[4], p_array, 0U)}; + const Id p1{OpCompositeExtract(F32[4], p_array, 1U)}; + const Id p2{OpCompositeExtract(F32[4], p_array, 2U)}; + const Id p10{OpFSub(F32[4], p1, p0)}; + const Id p20{OpFSub(F32[4], p2, p0)}; + const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)}; + const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)}; + const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)}; + const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)}; + const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)}; + params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z)); + Name(params.id, fmt::format("fs_in_attr{}", semantic)); + params.is_loaded = true; + } +} + Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { switch (default_value) { case 0: @@ -250,33 +269,42 @@ void EmitContext::DefineInputs() { base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); - for (const auto& input : info.vs_inputs) { - ASSERT(input.binding < IR::NumParams); - const Id type{GetAttributeType(*this, input.fmt)[4]}; - if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) { - + const auto fetch_shader = Gcn::ParseFetchShader(info); + if (!fetch_shader) { + break; + } + for (const auto& attrib : fetch_shader->attributes) { + ASSERT(attrib.semantic < IR::NumParams); + const auto sharp = attrib.GetSharp(info); + const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]}; + if (attrib.UsesStepRates()) { const u32 rate_idx = - input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0 - : 1; + attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0 + : 1; + const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt()); + const auto buffer = + std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) { + return buffer.instance_attrib == attrib.semantic; + }); // Note that we pass index rather than Id - input_params[input.binding] = { - rate_idx, - input_u32, - U32[1], - input.num_components, - true, - false, - input.instance_data_buf, + input_params[attrib.semantic] = SpirvAttribute{ + .id = rate_idx, + .pointer_type = input_u32, + .component_type = U32[1], + .num_components = std::min(attrib.num_elements, num_components), + .is_integer = true, + .is_loaded = false, + .buffer_handle = int(buffer - info.buffers.begin()), }; } else { - Id id{DefineInput(type, input.binding)}; - if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) { - Name(id, fmt::format("vs_instance_attr{}", input.binding)); + Id id{DefineInput(type, attrib.semantic)}; + if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) { + Name(id, fmt::format("vs_instance_attr{}", attrib.semantic)); } else { - Name(id, fmt::format("vs_in_attr{}", input.binding)); + Name(id, fmt::format("vs_in_attr{}", attrib.semantic)); } - input_params[input.binding] = GetAttributeInfo(input.fmt, id, 4, false); + input_params[attrib.semantic] = + GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false); interfaces.push_back(id); } } @@ -286,6 +314,10 @@ void EmitContext::DefineInputs() { frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input); frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output); front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); + if (profile.needs_manual_interpolation) { + gl_bary_coord_id = + DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); + } for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; const u32 semantic = input.param_index; @@ -299,14 +331,21 @@ void EmitContext::DefineInputs() { const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; const u32 num_components = info.loads.NumComponents(param); const Id type{F32[num_components]}; - const Id id{DefineInput(type, semantic)}; - if (input.is_flat) { - Decorate(id, spv::Decoration::Flat); + Id attr_id{}; + if (profile.needs_manual_interpolation && !input.is_flat) { + attr_id = DefineInput(TypeArray(type, ConstU32(3U)), semantic); + Decorate(attr_id, spv::Decoration::PerVertexKHR); + Name(attr_id, fmt::format("fs_in_attr{}_p", semantic)); + } else { + attr_id = DefineInput(type, semantic); + Name(attr_id, fmt::format("fs_in_attr{}", semantic)); + } + if (input.is_flat) { + Decorate(attr_id, spv::Decoration::Flat); } - Name(id, fmt::format("fs_in_attr{}", semantic)); input_params[semantic] = - GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, false); - interfaces.push_back(id); + GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); + interfaces.push_back(attr_id); } break; case Stage::Compute: @@ -512,9 +551,10 @@ void EmitContext::DefineBuffers() { void EmitContext::DefineTextureBuffers() { for (const auto& desc : info.texture_buffers) { - const bool is_integer = - desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint; - const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)}; + const auto sharp = desc.GetSharp(info); + const auto nfmt = sharp.GetNumberFmt(); + const bool is_integer = AmdGpu::IsInteger(nfmt); + const VectorIds& sampled_type{GetAttributeType(*this, nfmt)}; const u32 sampled = desc.is_written ? 2 : 1; const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false, sampled, spv::ImageFormat::Unknown)}; @@ -609,10 +649,11 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { } Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { - const auto image = ctx.info.ReadUdSharp(desc.sharp_idx); + const auto image = desc.GetSharp(ctx.info); const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown; + const auto type = image.GetBoundType(); const u32 sampled = desc.is_storage ? 2 : 1; - switch (desc.type) { + switch (type) { case AmdGpu::ImageType::Color1D: return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format); case AmdGpu::ImageType::Color1DArray: @@ -631,14 +672,15 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { default: break; } - throw InvalidArgument("Invalid texture type {}", desc.type); + throw InvalidArgument("Invalid texture type {}", type); } void EmitContext::DefineImagesAndSamplers() { for (const auto& image_desc : info.images) { - const bool is_integer = image_desc.nfmt == AmdGpu::NumberFormat::Uint || - image_desc.nfmt == AmdGpu::NumberFormat::Sint; - const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt); + const auto sharp = image_desc.GetSharp(info); + const auto nfmt = sharp.GetNumberFmt(); + const bool is_integer = AmdGpu::IsInteger(nfmt); + const VectorIds& data_types = GetAttributeType(*this, nfmt); const Id sampled_type = data_types[1]; const Id image_type{ImageType(*this, image_desc, sampled_type)}; const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index fb30a5dd6..1c5da946d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -42,7 +42,9 @@ public: ~EmitContext(); Id Def(const IR::Value& value); + void DefineBufferOffsets(); + void DefineInterpolatedAttribs(); [[nodiscard]] Id DefineInput(Id type, u32 location) { const Id input_id{DefineVar(type, spv::StorageClass::Input)}; @@ -197,6 +199,9 @@ public: Id shared_memory_u32_type{}; + Id interpolate_func{}; + Id gl_bary_coord_id{}; + struct TextureDefinition { const VectorIds* data_types; Id id; @@ -241,7 +246,7 @@ public: Id component_type; u32 num_components; bool is_integer{}; - bool is_default{}; + bool is_loaded{}; s32 buffer_handle{-1}; }; std::array input_params{}; diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 16938410c..8ae664d79 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -34,8 +34,14 @@ namespace Shader::Gcn { * We take the reverse way, extract the original input semantics from these instructions. **/ -FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { - FetchShaderData data{}; +std::optional ParseFetchShader(const Shader::Info& info) { + if (!info.has_fetch_shader) { + return std::nullopt; + } + const u32* code; + std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code)); + + FetchShaderData data{.code = code}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; @@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) { u32 semantic_index = 0; while (!code_slice.atEnd()) { const auto inst = decoder.decodeInstruction(code_slice); - *out_size += inst.length; + data.size += inst.length; if (inst.opcode == Opcode::S_SETPC_B64) { break; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 0e5d15419..ee9f5c805 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -3,26 +3,80 @@ #pragma once +#include #include #include "common/types.h" +#include "shader_recompiler/info.h" namespace Shader::Gcn { struct VertexAttribute { + enum InstanceIdType : u8 { + None = 0, + OverStepRate0 = 1, + OverStepRate1 = 2, + Plain = 3, + }; + u8 semantic; ///< Semantic index of the attribute u8 dest_vgpr; ///< Destination VGPR to load first component. u8 num_elements; ///< Number of components to load u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# u8 dword_offset; ///< The dword offset of the V# that describes this attribute. u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate + + [[nodiscard]] InstanceIdType GetStepRate() const { + return static_cast(instance_data); + } + + [[nodiscard]] bool UsesStepRates() const { + const auto step_rate = GetStepRate(); + return step_rate == OverStepRate0 || step_rate == OverStepRate1; + } + + [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept { + return info.ReadUdReg(sgpr_base, dword_offset); + } + + bool operator==(const VertexAttribute& other) const { + return semantic == other.semantic && dest_vgpr == other.dest_vgpr && + num_elements == other.num_elements && sgpr_base == other.sgpr_base && + dword_offset == other.dword_offset && instance_data == other.instance_data; + } }; struct FetchShaderData { + const u32* code; + u32 size = 0; std::vector attributes; s8 vertex_offset_sgpr = -1; ///< SGPR of vertex offset from VADDR s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR + + [[nodiscard]] bool UsesStepRates() const { + return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) { + return attribute.UsesStepRates(); + }) != attributes.end(); + } + + [[nodiscard]] std::pair GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs, + const Info& info) const { + u32 vertex_offset = regs.index_offset; + u32 instance_offset = 0; + if (vertex_offset == 0 && vertex_offset_sgpr != -1) { + vertex_offset = info.user_data[vertex_offset_sgpr]; + } + if (instance_offset_sgpr != -1) { + instance_offset = info.user_data[instance_offset_sgpr]; + } + return {vertex_offset, instance_offset}; + } + + bool operator==(const FetchShaderData& other) const { + return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && + instance_offset_sgpr == other.instance_offset_sgpr; + } }; -FetchShaderData ParseFetchShader(const u32* code, u32* out_size); +std::optional ParseFetchShader(const Shader::Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 005c4a7ff..68625a12b 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra void Translator::EmitFetch(const GcnInst& inst) { // Read the pointer to the fetch shader assembly. - const u32 sgpr_base = inst.src[0].code; - const u32* code; - std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); + info.has_fetch_shader = true; + info.fetch_shader_sgpr_base = inst.src[0].code; - // Parse the assembly to generate a list of attributes. - u32 fetch_size{}; - const auto fetch_data = ParseFetchShader(code, &fetch_size); + const auto fetch_data = ParseFetchShader(info); + ASSERT(fetch_data.has_value()); if (Config::dumpShaders()) { using namespace Common::FS; @@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) { } const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteRaw(code, fetch_size); + file.WriteRaw(fetch_data->code, fetch_data->size); } - info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr; - info.instance_offset_sgpr = fetch_data.instance_offset_sgpr; - - for (const auto& attrib : fetch_data.attributes) { + for (const auto& attrib : fetch_data->attributes) { const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; IR::VectorReg dst_reg{attrib.dest_vgpr}; @@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) { // In case of programmable step rates we need to fallback to instance data pulling in // shader, so VBs should be bound as regular data buffers - s32 instance_buf_handle = -1; - const auto step_rate = static_cast(attrib.instance_data); - if (step_rate == Info::VsInput::OverStepRate0 || - step_rate == Info::VsInput::OverStepRate1) { + if (attrib.UsesStepRates()) { info.buffers.push_back({ .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4), .used_types = IR::Type::F32, .is_instance_data = true, + .instance_attrib = attrib.semantic, }); - instance_buf_handle = s32(info.buffers.size() - 1); - info.uses_step_rates = true; } - - const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); - info.vs_inputs.push_back({ - .fmt = buffer.GetNumberFmt(), - .binding = attrib.semantic, - .num_components = std::min(attrib.num_elements, num_components), - .sgpr_base = attrib.sgpr_base, - .dword_offset = attrib.dword_offset, - .instance_step_rate = step_rate, - .instance_data_buf = instance_buf_handle, - }); } } diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index c7ae2a1e5..d382d0e7c 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -45,6 +45,7 @@ struct BufferResource { AmdGpu::Buffer inline_cbuf; bool is_gds_buffer{}; bool is_instance_data{}; + u8 instance_attrib{}; bool is_written{}; bool IsStorage(AmdGpu::Buffer buffer) const noexcept { @@ -57,7 +58,6 @@ using BufferResourceList = boost::container::small_vector; struct TextureBufferResource { u32 sharp_idx; - AmdGpu::NumberFormat nfmt; bool is_written{}; constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; @@ -66,8 +66,6 @@ using TextureBufferResourceList = boost::container::small_vector vs_inputs{}; - struct AttributeFlags { bool Get(IR::Attribute attrib, u32 comp = 0) const { return flags[Index(attrib)] & (1 << comp); @@ -179,9 +159,6 @@ struct Info { CopyShaderData gs_copy_data; - s8 vertex_offset_sgpr = -1; - s8 instance_offset_sgpr = -1; - BufferResourceList buffers; TextureBufferResourceList texture_buffers; ImageResourceList images; @@ -208,10 +185,11 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; - bool uses_step_rates{}; bool translation_failed{}; // indicates that shader has unsupported instructions bool has_readconst{}; u8 mrt_mask{0u}; + bool has_fetch_shader{false}; + u32 fetch_shader_sgpr_base{0u}; explicit Info(Stage stage_, ShaderParams params) : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, @@ -252,18 +230,6 @@ struct Info { bnd.user_data += ud_mask.NumRegs(); } - [[nodiscard]] std::pair GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const { - u32 vertex_offset = regs.index_offset; - u32 instance_offset = 0; - if (vertex_offset == 0 && vertex_offset_sgpr != -1) { - vertex_offset = user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = user_data[instance_offset_sgpr]; - } - return {vertex_offset, instance_offset}; - } - void RefreshFlatBuf() { flattened_ud_buf.resize(srt_info.flattened_bufsize_dw); ASSERT(user_data.size() <= NumUserDataRegs); @@ -284,7 +250,12 @@ constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const } constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept { - return info.ReadUdSharp(sharp_idx); + const auto image = info.ReadUdSharp(sharp_idx); + if (!image.Valid()) { + // Fall back to null image if unbound. + return AmdGpu::Image::Null(); + } + return image; } constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept { diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 7d29c845d..c1ff3d2f2 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -381,7 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, const auto buffer = info.ReadUdSharp(sharp); const s32 binding = descriptors.Add(TextureBufferResource{ .sharp_idx = sharp, - .nfmt = buffer.GetNumberFmt(), .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32, }); @@ -660,11 +659,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } - const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType(); u32 image_binding = descriptors.Add(ImageResource{ .sharp_idx = tsharp, - .type = type, - .nfmt = image.GetNumberFmt(), .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index bbda731e0..96c458d44 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -22,8 +22,10 @@ struct Profile { bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_flush{}; bool support_explicit_workgroup_layout{}; + bool support_legacy_vertex_attributes{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; + bool needs_manual_interpolation{}; u64 min_ssbo_alignment{}; }; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 225b164b5..740b89dda 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -6,12 +6,19 @@ #include #include "common/types.h" +#include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/passes/srt.h" namespace Shader { +struct VsAttribSpecialization { + AmdGpu::NumberClass num_class{}; + + auto operator<=>(const VsAttribSpecialization&) const = default; +}; + struct BufferSpecialization { u16 stride : 14; u16 is_storage : 1; @@ -50,6 +57,8 @@ struct StageSpecialization { const Shader::Info* info; RuntimeInfo runtime_info; + Gcn::FetchShaderData fetch_shader_data{}; + boost::container::small_vector vs_attribs; std::bitset bitset{}; boost::container::small_vector buffers; boost::container::small_vector tex_buffers; @@ -57,9 +66,19 @@ struct StageSpecialization { boost::container::small_vector fmasks; Backend::Bindings start{}; - explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_, - Backend::Bindings start_) + explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, + const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { + if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) { + fetch_shader_data = *fetch_shader; + if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) { + // Specialize shader on VS input number types to follow spec. + ForEachSharp(vs_attribs, fetch_shader_data.attributes, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); + }); + } + } u32 binding{}; if (info->has_readconst) { binding++; @@ -75,8 +94,7 @@ struct StageSpecialization { }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray - : sharp.GetType(); + spec.type = sharp.GetBoundType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); }); ForEachSharp(binding, fmasks, info->fmasks, @@ -86,6 +104,17 @@ struct StageSpecialization { }); } + void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + continue; + } + func(spec, desc, sharp); + } + } + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { for (const auto& desc : desc_list) { auto& spec = spec_list.emplace_back(); @@ -106,6 +135,14 @@ struct StageSpecialization { if (runtime_info != other.runtime_info) { return false; } + if (fetch_shader_data != other.fetch_shader_data) { + return false; + } + for (u32 i = 0; i < vs_attribs.size(); i++) { + if (vs_attribs[i] != other.vs_attribs[i]) { + return false; + } + } u32 binding{}; if (info->has_readconst != other.info->has_readconst) { return false; diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index e83313ea4..38c81ba5f 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -10,7 +10,24 @@ namespace AmdGpu { -[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) { +enum NumberClass { + Float, + Sint, + Uint, +}; + +[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) { + switch (nfmt) { + case NumberFormat::Sint: + return Sint; + case NumberFormat::Uint: + return Uint; + default: + return Float; + } +} + +[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) { return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint; } diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index f43fc9800..a78a68391 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -304,6 +304,10 @@ struct Image { const auto viewed_slice = last_array - base_array + 1; return GetType() == ImageType::Cube && viewed_slice < 6; } + + ImageType GetBoundType() const noexcept { + return IsPartialCubemap() ? ImageType::Color2DArray : GetType(); + } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 77b353c2f..1abdb230b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -5,6 +5,7 @@ #include "common/alignment.h" #include "common/scope_exit.h" #include "common/types.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/info.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" @@ -107,7 +108,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si } } -bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { +bool BufferCache::BindVertexBuffers( + const Shader::Info& vs_info, const std::optional& fetch_shader) { boost::container::small_vector attributes; boost::container::small_vector bindings; SCOPE_EXIT { @@ -126,7 +128,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { } }; - if (vs_info.vs_inputs.empty()) { + if (!fetch_shader || fetch_shader->attributes.empty()) { return false; } @@ -150,30 +152,29 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { // Calculate buffers memory overlaps bool has_step_rate = false; boost::container::static_vector ranges{}; - for (const auto& input : vs_info.vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { has_step_rate = true; continue; } - const auto& buffer = vs_info.ReadUdReg(input.sgpr_base, input.dword_offset); + const auto& buffer = attrib.GetSharp(vs_info); if (buffer.GetSize() == 0) { continue; } guest_buffers.emplace_back(buffer); ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize()); attributes.push_back({ - .location = input.binding, - .binding = input.binding, + .location = attrib.semantic, + .binding = attrib.semantic, .format = Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); bindings.push_back({ - .binding = input.binding, + .binding = attrib.semantic, .stride = buffer.GetStride(), - .inputRate = input.instance_step_rate == Shader::Info::VsInput::None + .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None ? vk::VertexInputRate::eVertex : vk::VertexInputRate::eInstance, .divisor = 1, @@ -236,7 +237,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) { u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { // Emulate QuadList primitive type with CPU made index buffer. const auto& regs = liverpool->regs; - if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { + if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList && !is_indexed) { is_indexed = true; // Emit indices. @@ -262,6 +263,32 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) { VAddr index_address = regs.index_base_address.Address(); index_address += index_offset * index_size; + if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { + // Convert indices. + const u32 new_index_size = regs.num_indices * index_size * 6 / 4; + const auto [data, offset] = stream_buffer.Map(new_index_size); + const auto index_ptr = reinterpret_cast(index_address); + switch (index_type) { + case vk::IndexType::eUint16: + Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices(data, index_ptr, + regs.num_indices); + break; + case vk::IndexType::eUint32: + Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices(data, index_ptr, + regs.num_indices); + break; + default: + UNREACHABLE_MSG("Unsupported QuadList index type {}", vk::to_string(index_type)); + break; + } + stream_buffer.Commit(); + + // Bind index buffer. + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type); + return new_index_size / index_size; + } + // Bind index buffer. const u32 index_buffer_size = regs.num_indices * index_size; const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e2519e942..b1bf77f8a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,8 +20,11 @@ struct Liverpool; } namespace Shader { -struct Info; +namespace Gcn { +struct FetchShaderData; } +struct Info; +} // namespace Shader namespace VideoCore { @@ -76,7 +79,8 @@ public: void InvalidateMemory(VAddr device_addr, u64 size); /// Binds host vertex buffers for the current draw. - bool BindVertexBuffers(const Shader::Info& vs_info); + bool BindVertexBuffers(const Shader::Info& vs_info, + const std::optional& fetch_shader); /// Bind host index buffer for the current draw. u32 BindIndexBuffer(bool& is_indexed, u32 index_offset); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 258e7f391..2262a429a 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -726,19 +726,6 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat return format->vk_format; } -void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { - static constexpr u16 NumVerticesPerQuad = 4; - u16* out_data = reinterpret_cast(out_ptr); - for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { - *out_data++ = i; - *out_data++ = i + 1; - *out_data++ = i + 2; - *out_data++ = i; - *out_data++ = i + 2; - *out_data++ = i + 3; - } -} - vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { const auto comp_swap = color_buffer.info.comp_swap.Value(); const auto format = color_buffer.info.format.Value(); diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 70e707fad..287ba691e 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -68,7 +68,33 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags); -void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); +static constexpr u16 NumVerticesPerQuad = 4; + +inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { + u16* out_data = reinterpret_cast(out_ptr); + for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { + *out_data++ = i; + *out_data++ = i + 1; + *out_data++ = i + 2; + *out_data++ = i; + *out_data++ = i + 2; + *out_data++ = i + 3; + } +} + +template +void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) { + T* out_data = reinterpret_cast(out_ptr); + const T* in_data = reinterpret_cast(in_ptr); + for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { + *out_data++ = in_data[i]; + *out_data++ = in_data[i + 1]; + *out_data++ = in_data[i + 2]; + *out_data++ = in_data[i]; + *out_data++ = in_data[i + 2]; + *out_data++ = in_data[i + 3]; + } +} static inline vk::Format PromoteFormatToDepth(vk::Format fmt) { if (fmt == vk::Format::eR32Sfloat) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d0d16ac75..d53204c77 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include @@ -10,6 +11,8 @@ #include "video_core/amdgpu/resource.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" + +#include "shader_recompiler/frontend/fetch_shader.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/texture_cache.h" @@ -20,8 +23,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, std::span infos, + std::optional fetch_shader_, std::span modules) - : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} { + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_}, + fetch_shader{std::move(fetch_shader_)} { const vk::Device device = instance.GetDevice(); std::ranges::copy(infos, stages.begin()); BuildDescSetLayout(); @@ -46,32 +51,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector vertex_bindings; boost::container::static_vector vertex_attributes; - if (!instance.IsVertexInputDynamicState()) { - const auto& vs_info = stages[u32(Shader::Stage::Vertex)]; - for (const auto& input : vs_info->vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + if (fetch_shader && !instance.IsVertexInputDynamicState()) { + const auto& vs_info = GetStage(Shader::Stage::Vertex); + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { // Skip attribute binding as the data will be pulled by shader continue; } - const auto buffer = - vs_info->ReadUdReg(input.sgpr_base, input.dword_offset); + const auto buffer = attrib.GetSharp(vs_info); if (buffer.GetSize() == 0) { continue; } vertex_attributes.push_back({ - .location = input.binding, - .binding = input.binding, + .location = attrib.semantic, + .binding = attrib.semantic, .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()), .offset = 0, }); vertex_bindings.push_back({ - .binding = input.binding, + .binding = attrib.semantic, .stride = buffer.GetStride(), - .inputRate = input.instance_step_rate == Shader::Info::VsInput::None - ? vk::VertexInputRate::eVertex - : vk::VertexInputRate::eInstance, + .inputRate = + attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None + ? vk::VertexInputRate::eVertex + : vk::VertexInputRate::eInstance, }); } } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4f4abfd16..91ffe4ea4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -4,6 +4,7 @@ #include #include "common/types.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h" @@ -59,9 +60,14 @@ public: GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache, std::span stages, + std::optional fetch_shader, std::span modules); ~GraphicsPipeline(); + const std::optional& GetFetchShader() const noexcept { + return fetch_shader; + } + bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; @@ -94,6 +100,7 @@ private: private: GraphicsPipelineKey key; + std::optional fetch_shader{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 580458e7e..49e4987db 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -256,6 +256,7 @@ bool Instance::CreateDevice() { workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); + fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); // The next two extensions are required to be available together in order to support write masks color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME); @@ -264,6 +265,7 @@ bool Instance::CreateDevice() { const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME); + legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2 // with extensions. @@ -399,6 +401,12 @@ bool Instance::CreateDevice() { vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{ .primitiveTopologyListRestart = true, }, + vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{ + .fragmentShaderBarycentric = true, + }, + vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{ + .legacyVertexAttributes = true, + }, #ifdef __APPLE__ feature_chain.get(), #endif @@ -438,6 +446,12 @@ bool Instance::CreateDevice() { if (!vertex_input_dynamic_state) { device_chain.unlink(); } + if (!fragment_shader_barycentric) { + device_chain.unlink(); + } + if (!legacy_vertex_attributes) { + device_chain.unlink(); + } auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get()); if (device_result != vk::Result::eSuccess) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 51c2c57c5..81303c9cc 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -143,10 +143,21 @@ public: return maintenance5; } + /// Returns true when VK_KHR_fragment_shader_barycentric is supported. + bool IsFragmentShaderBarycentricSupported() const { + return fragment_shader_barycentric; + } + + /// Returns true when VK_EXT_primitive_topology_list_restart is supported. bool IsListRestartSupported() const { return list_restart; } + /// Returns true when VK_EXT_legacy_vertex_attributes is supported. + bool IsLegacyVertexAttributesSupported() const { + return legacy_vertex_attributes; + } + /// Returns true when geometry shaders are supported by the device bool IsGeometryStageSupported() const { return features.geometryShader; @@ -315,6 +326,7 @@ private: bool null_descriptor{}; bool maintenance5{}; bool list_restart{}; + bool legacy_vertex_attributes{}; u64 min_imported_host_pointer_alignment{}; u32 subgroup_size{}; bool tooling_info{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 612e950bb..47713f0ff 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -169,6 +169,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32), .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_explicit_workgroup_layout = true, + .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), + .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && + instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, }; auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({}); ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}", @@ -185,7 +188,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() { const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); if (is_new) { it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key, - *pipeline_cache, infos, modules); + *pipeline_cache, infos, fetch_shader, modules); } return it->second; } @@ -302,8 +305,12 @@ bool PipelineCache::RefreshGraphicsKey() { } auto params = Liverpool::GetParams(*pgm); - std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) = - GetProgram(stage_in, params, binding); + std::optional fetch_shader_; + std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_, + key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding); + if (fetch_shader_) { + fetch_shader = fetch_shader_; + } return true; }; @@ -339,16 +346,14 @@ bool PipelineCache::RefreshGraphicsKey() { } } - const auto* vs_info = infos[static_cast(Shader::Stage::Vertex)]; - if (vs_info && !instance.IsVertexInputDynamicState()) { + const auto vs_info = infos[static_cast(Shader::Stage::Vertex)]; + if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { u32 vertex_binding = 0; - for (const auto& input : vs_info->vs_inputs) { - if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || - input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + for (const auto& attrib : fetch_shader->attributes) { + if (attrib.UsesStepRates()) { continue; } - const auto& buffer = - vs_info->ReadUdReg(input.sgpr_base, input.dword_offset); + const auto& buffer = attrib.GetSharp(*vs_info); if (buffer.GetSize() == 0) { continue; } @@ -392,7 +397,7 @@ bool PipelineCache::RefreshComputeKey() { Shader::Backend::Bindings binding{}; const auto* cs_pgm = &liverpool->regs.cs_program; const auto cs_params = Liverpool::GetParams(*cs_pgm); - std::tie(infos[0], modules[0], compute_key) = + std::tie(infos[0], modules[0], fetch_shader, compute_key) = GetProgram(Shader::Stage::Compute, cs_params, binding); return true; } @@ -423,24 +428,26 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, return module; } -std::tuple PipelineCache::GetProgram( - Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) { +std::tuple, u64> +PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params, + Shader::Backend::Bindings& binding) { const auto runtime_info = BuildRuntimeInfo(stage); auto [it_pgm, new_program] = program_cache.try_emplace(params.hash); if (new_program) { Program* program = program_pool.Create(stage, params); auto start = binding; const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding); - const auto spec = Shader::StageSpecialization(program->info, runtime_info, start); + const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start); program->AddPermut(module, std::move(spec)); it_pgm.value() = program; - return std::make_tuple(&program->info, module, HashCombine(params.hash, 0)); + return std::make_tuple(&program->info, module, spec.fetch_shader_data, + HashCombine(params.hash, 0)); } Program* program = it_pgm->second; auto& info = program->info; info.RefreshFlatBuf(); - const auto spec = Shader::StageSpecialization(info, runtime_info, binding); + const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding); size_t perm_idx = program->modules.size(); vk::ShaderModule module{}; @@ -454,7 +461,8 @@ std::tuple PipelineCache::GetProgram module = it->module; perm_idx = std::distance(program->modules.begin(), it); } - return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx)); + return std::make_tuple(&info, module, spec.fetch_shader_data, + HashCombine(params.hash, perm_idx)); } void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 662bcbd80..e4a8abd4f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -47,8 +47,10 @@ public: const ComputePipeline* GetComputePipeline(); - std::tuple GetProgram( - Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding); + std::tuple, + u64> + GetProgram(Shader::Stage stage, Shader::ShaderParams params, + Shader::Backend::Bindings& binding); private: bool RefreshGraphicsKey(); @@ -80,6 +82,7 @@ private: tsl::robin_map graphics_pipelines; std::array infos{}; std::array modules{}; + std::optional fetch_shader{}; GraphicsPipelineKey graphics_key{}; u64 compute_key{}; }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ff5e88141..084b7c345 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -187,13 +187,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { } const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); - buffer_cache.BindVertexBuffers(vs_info); + const auto& fetch_shader = pipeline->GetFetchShader(); + buffer_cache.BindVertexBuffers(vs_info, fetch_shader); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); - const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs); + const auto [vertex_offset, instance_offset] = fetch_shader->GetDrawOffsets(regs, vs_info); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); @@ -243,7 +244,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); - buffer_cache.BindVertexBuffers(vs_info); + const auto& fetch_shader = pipeline->GetFetchShader(); + buffer_cache.BindVertexBuffers(vs_info, fetch_shader); buffer_cache.BindIndexBuffer(is_indexed, 0); const auto& [buffer, base] = @@ -397,10 +399,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { if (!stage) { continue; } - if (stage->uses_step_rates) { - push_data.step0 = regs.vgt_instance_step_rate_0; - push_data.step1 = regs.vgt_instance_step_rate_1; - } + push_data.step0 = regs.vgt_instance_step_rate_0; + push_data.step1 = regs.vgt_instance_step_rate_1; stage->PushUd(binding, push_data); BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 488d44a7f..61cabdf11 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -87,12 +87,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso range.extent.levels = image.last_level - image.base_level + 1; } range.extent.layers = image.last_array - image.base_array + 1; - type = ConvertImageViewType(image.GetType()); + type = ConvertImageViewType(image.GetBoundType()); - // Adjust view type for partial cubemaps and arrays - if (image.IsPartialCubemap()) { - type = vk::ImageViewType::e2DArray; - } + // Adjust view type for arrays if (type == vk::ImageViewType::eCube) { if (desc.is_array) { type = vk::ImageViewType::eCubeArray; diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index c4f24420d..7430168d0 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -182,12 +182,15 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eB8G8R8A8Srgb: case vk::Format::eB8G8R8A8Unorm: case vk::Format::eR8G8B8A8Unorm: + case vk::Format::eR8G8B8A8Snorm: case vk::Format::eR8G8B8A8Uint: case vk::Format::eR32Sfloat: case vk::Format::eR32Uint: case vk::Format::eR16G16Sfloat: case vk::Format::eR16G16Unorm: + case vk::Format::eR16G16Snorm: case vk::Format::eB10G11R11UfloatPack32: + case vk::Format::eA2B10G10R10UnormPack32: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaSrgbBlock: case vk::Format::eBc1RgbaUnormBlock: