Merge remote-tracking branch 'upstream/main'

2025-12-17 17:19:02 +00:00 · 2024-12-05 13:05:25 +08:00
parent 0f9c31a78f c019b54fec
commit 28ada0425e
39 changed files with 615 additions and 339 deletions
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -69,7 +69,7 @@ if (NOT TARGET ZLIB::ZLIB)
    FetchContent_MakeAvailable(ZLIB)
    add_library(ZLIB::ZLIB ALIAS zlib)
    # libpng expects this variable to exist after its find_package(ZLIB)
-    get_target_property(ZLIB_INCLUDE_DIRS zlib INTERFACE_INCLUDE_DIRECTORIES)
+    set(ZLIB_INCLUDE_DIRS "${FETCHCONTENT_BASE_DIR}/zlib-build")
 endif()

 # SDL3
--- a/src/core/devtools/widget/memory_map.cpp
+++ b/src/core/devtools/widget/memory_map.cpp
@@ -1,6 +1,7 @@
 //  SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 //  SPDX-License-Identifier: GPL-2.0-or-later

+#include <cinttypes>
 #include <imgui.h>
 #include <magic_enum.hpp>

@@ -23,7 +24,7 @@ bool MemoryMapViewer::Iterator::DrawLine() {
            return DrawLine();
        }
        TableNextColumn();
-        Text("%zX", m.base);
+        Text("%" PRIXPTR, m.base);
        TableNextColumn();
        Text("%zX", m.size);
        TableNextColumn();
@@ -48,9 +49,9 @@ bool MemoryMapViewer::Iterator::DrawLine() {
        return DrawLine();
    }
    TableNextColumn();
-    Text("%llX", m.base);
+    Text("%" PRIXPTR, m.base);
    TableNextColumn();
-    Text("%llX", m.size);
+    Text("%zX", m.size);
    TableNextColumn();
    auto type = static_cast<::Libraries::Kernel::MemoryTypes>(m.memory_type);
    Text("%s", magic_enum::enum_name(type).data());
--- a/src/core/libraries/ime/ime.cpp
+++ b/src/core/libraries/ime/ime.cpp
@@ -44,10 +44,14 @@ public:
            openEvent.param.rect.y = m_param.ime.posy;
        } else {
            openEvent.param.resource_id_array.userId = 1;
-            openEvent.param.resource_id_array.resource_id[0] = 1;
+            openEvent.param.resource_id_array.resourceId[0] = 1;
        }

-        Execute(nullptr, &openEvent, true);
+        // Are we supposed to call the event handler on init with
+        // ADD_OSK?
+        if (!ime_mode && False(m_param.key.option & OrbisImeKeyboardOption::AddOsk)) {
+            Execute(nullptr, &openEvent, true);
+        }

        if (ime_mode) {
            g_ime_state = ImeState(&m_param.ime);
@@ -56,6 +60,11 @@ public:
    }

    s32 Update(OrbisImeEventHandler handler) {
+        if (!m_ime_mode) {
+            /* We don't handle any events for ImeKeyboard */
+            return ORBIS_OK;
+        }
+
        std::unique_lock lock{g_ime_state.queue_mutex};

        while (!g_ime_state.event_queue.empty()) {
@@ -85,6 +94,16 @@ public:
        }
    }

+    s32 SetText(const char16_t* text, u32 length) {
+        g_ime_state.SetText(text, length);
+        return ORBIS_OK;
+    }
+
+    s32 SetCaret(const OrbisImeCaret* caret) {
+        g_ime_state.SetCaret(caret->index);
+        return ORBIS_OK;
+    }
+
    bool IsIme() {
        return m_ime_mode;
    }
@@ -98,6 +117,7 @@ private:
 };

 static std::unique_ptr<ImeHandler> g_ime_handler;
+static std::unique_ptr<ImeHandler> g_keyboard_handler;

 int PS4_SYSV_ABI FinalizeImeModule() {
    LOG_ERROR(Lib_Ime, "(STUBBED) called");
@@ -130,9 +150,6 @@ s32 PS4_SYSV_ABI sceImeClose() {
    if (!g_ime_handler) {
        return ORBIS_IME_ERROR_NOT_OPENED;
    }
-    if (!g_ime_handler->IsIme()) {
-        return ORBIS_IME_ERROR_NOT_OPENED;
-    }

    g_ime_handler.release();
    g_ime_ui = ImeUi();
@@ -233,14 +250,11 @@ s32 PS4_SYSV_ABI sceImeGetPanelSize(const OrbisImeParam* param, u32* width, u32*
 s32 PS4_SYSV_ABI sceImeKeyboardClose(s32 userId) {
    LOG_INFO(Lib_Ime, "(STUBBED) called");

-    if (!g_ime_handler) {
-        return ORBIS_IME_ERROR_NOT_OPENED;
-    }
-    if (g_ime_handler->IsIme()) {
+    if (!g_keyboard_handler) {
        return ORBIS_IME_ERROR_NOT_OPENED;
    }

-    g_ime_handler.release();
+    g_keyboard_handler.release();
    return ORBIS_OK;
 }

@@ -255,18 +269,17 @@ int PS4_SYSV_ABI sceImeKeyboardGetResourceId() {
 }

 s32 PS4_SYSV_ABI sceImeKeyboardOpen(s32 userId, const OrbisImeKeyboardParam* param) {
-    LOG_ERROR(Lib_Ime, "(STUBBED) called");
+    LOG_INFO(Lib_Ime, "called");

    if (!param) {
        return ORBIS_IME_ERROR_INVALID_ADDRESS;
    }
-    if (g_ime_handler) {
+    if (g_keyboard_handler) {
        return ORBIS_IME_ERROR_BUSY;
    }

-    // g_ime_handler = std::make_unique<ImeHandler>(param);
-    // return ORBIS_OK;
-    return ORBIS_IME_ERROR_CONNECTION_FAILED; // Fixup
+    g_keyboard_handler = std::make_unique<ImeHandler>(param);
+    return ORBIS_OK;
 }

 int PS4_SYSV_ABI sceImeKeyboardOpenInternal() {
@@ -287,16 +300,14 @@ int PS4_SYSV_ABI sceImeKeyboardUpdate() {
 s32 PS4_SYSV_ABI sceImeOpen(const OrbisImeParam* param, const void* extended) {
    LOG_INFO(Lib_Ime, "called");

-    if (!g_ime_handler) {
-        g_ime_handler = std::make_unique<ImeHandler>(param);
-    } else {
-        if (g_ime_handler->IsIme()) {
-            return ORBIS_IME_ERROR_BUSY;
-        }
-
-        g_ime_handler->Init((void*)param, true);
+    if (!param) {
+        return ORBIS_IME_ERROR_INVALID_ADDRESS;
+    }
+    if (g_ime_handler) {
+        return ORBIS_IME_ERROR_BUSY;
    }

+    g_ime_handler = std::make_unique<ImeHandler>(param);
    return ORBIS_OK;
 }

@@ -322,13 +333,29 @@ int PS4_SYSV_ABI sceImeSetCandidateIndex() {
 }

 int PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret) {
-    LOG_ERROR(Lib_Ime, "(STUBBED) called");
-    return ORBIS_OK;
+    LOG_TRACE(Lib_Ime, "called");
+
+    if (!g_ime_handler) {
+        return ORBIS_IME_ERROR_NOT_OPENED;
+    }
+    if (!caret) {
+        return ORBIS_IME_ERROR_INVALID_ADDRESS;
+    }
+
+    return g_ime_handler->SetCaret(caret);
 }

-int PS4_SYSV_ABI sceImeSetText() {
-    LOG_ERROR(Lib_Ime, "(STUBBED) called");
-    return ORBIS_OK;
+s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length) {
+    LOG_TRACE(Lib_Ime, "called");
+
+    if (!g_ime_handler) {
+        return ORBIS_IME_ERROR_NOT_OPENED;
+    }
+    if (!text) {
+        return ORBIS_IME_ERROR_INVALID_ADDRESS;
+    }
+
+    return g_ime_handler->SetText(text, length);
 }

 int PS4_SYSV_ABI sceImeSetTextGeometry() {
@@ -337,13 +364,19 @@ int PS4_SYSV_ABI sceImeSetTextGeometry() {
 }

 s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler) {
-    LOG_TRACE(Lib_Ime, "called");
+    if (g_ime_handler) {
+        g_ime_handler->Update(handler);
+    }

-    if (!g_ime_handler) {
+    if (g_keyboard_handler) {
+        g_keyboard_handler->Update(handler);
+    }
+
+    if (!g_ime_handler || !g_keyboard_handler) {
        return ORBIS_IME_ERROR_NOT_OPENED;
    }

-    return g_ime_handler->Update(handler);
+    return ORBIS_OK;
 }

 int PS4_SYSV_ABI sceImeVshClearPreedit() {
--- a/src/core/libraries/ime/ime.h
+++ b/src/core/libraries/ime/ime.h
@@ -26,6 +26,24 @@ enum class OrbisImeKeyboardOption : u32 {
 };
 DECLARE_ENUM_FLAG_OPERATORS(OrbisImeKeyboardOption)

+enum class OrbisImeOption : u32 {
+    DEFAULT = 0,
+    MULTILINE = 1,
+    NO_AUTO_CAPITALIZATION = 2,
+    PASSWORD = 4,
+    LANGUAGES_FORCED = 8,
+    EXT_KEYBOARD = 16,
+    NO_LEARNING = 32,
+    FIXED_POSITION = 64,
+    DISABLE_RESUME = 256,
+    DISABLE_AUTO_SPACE = 512,
+    DISABLE_POSITION_ADJUSTMENT = 2048,
+    EXPANDED_PREEDIT_BUFFER = 4096,
+    USE_JAPANESE_EISUU_KEY_AS_CAPSLOCK = 8192,
+    USE_2K_COORDINATES = 16384,
+};
+DECLARE_ENUM_FLAG_OPERATORS(OrbisImeOption)
+
 struct OrbisImeKeyboardParam {
    OrbisImeKeyboardOption option;
    s8 reserved1[4];
@@ -41,9 +59,9 @@ struct OrbisImeParam {
    OrbisImeEnterLabel enter_label;
    OrbisImeInputMethod input_method;
    OrbisImeTextFilter filter;
-    u32 option;
-    u32 max_text_length;
-    char16_t* input_text_buffer;
+    OrbisImeOption option;
+    u32 maxTextLength;
+    char16_t* inputTextBuffer;
    float posx;
    float posy;
    OrbisImeHorizontalAlignment horizontal_alignment;
@@ -93,7 +111,7 @@ int PS4_SYSV_ABI sceImeOpenInternal();
 void PS4_SYSV_ABI sceImeParamInit(OrbisImeParam* param);
 int PS4_SYSV_ABI sceImeSetCandidateIndex();
 s32 PS4_SYSV_ABI sceImeSetCaret(const OrbisImeCaret* caret);
-int PS4_SYSV_ABI sceImeSetText();
+s32 PS4_SYSV_ABI sceImeSetText(const char16_t* text, u32 length);
 int PS4_SYSV_ABI sceImeSetTextGeometry();
 s32 PS4_SYSV_ABI sceImeUpdate(OrbisImeEventHandler handler);
 int PS4_SYSV_ABI sceImeVshClearPreedit();
--- a/src/core/libraries/ime/ime_common.h
+++ b/src/core/libraries/ime/ime_common.h
@@ -142,7 +142,7 @@ struct OrbisImeKeycode {

 struct OrbisImeKeyboardResourceIdArray {
    s32 userId;
-    u32 resource_id[6];
+    u32 resourceId[5];
 };

 enum class OrbisImeCaretMovementDirection : u32 {
--- a/src/core/libraries/ime/ime_ui.cpp
+++ b/src/core/libraries/ime/ime_ui.cpp
@@ -16,7 +16,7 @@ ImeState::ImeState(const OrbisImeParam* param) {
    }

    work_buffer = param->work;
-    text_buffer = param->input_text_buffer;
+    text_buffer = param->inputTextBuffer;

    std::size_t text_len = std::char_traits<char16_t>::length(text_buffer);
    if (!ConvertOrbisToUTF8(text_buffer, text_len, current_text.begin(),
@@ -26,15 +26,13 @@ ImeState::ImeState(const OrbisImeParam* param) {
 }

 ImeState::ImeState(ImeState&& other) noexcept
-    : input_changed(other.input_changed), work_buffer(other.work_buffer),
-      text_buffer(other.text_buffer), current_text(std::move(other.current_text)),
-      event_queue(std::move(other.event_queue)) {
+    : work_buffer(other.work_buffer), text_buffer(other.text_buffer),
+      current_text(std::move(other.current_text)), event_queue(std::move(other.event_queue)) {
    other.text_buffer = nullptr;
 }

 ImeState& ImeState::operator=(ImeState&& other) noexcept {
    if (this != &other) {
-        input_changed = other.input_changed;
        work_buffer = other.work_buffer;
        text_buffer = other.text_buffer;
        current_text = std::move(other.current_text);
@@ -63,6 +61,10 @@ void ImeState::SendCloseEvent() {
    SendEvent(&closeEvent);
 }

+void ImeState::SetText(const char16_t* text, u32 length) {}
+
+void ImeState::SetCaret(u32 position) {}
+
 bool ImeState::ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len,
                                  char* utf8_text, std::size_t utf8_text_len) {
    std::fill(utf8_text, utf8_text + utf8_text_len, '\0');
@@ -180,9 +182,8 @@ void ImeUi::DrawInputText() {
    if (first_render) {
        SetKeyboardFocusHere();
    }
-    if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->max_text_length,
+    if (InputTextEx("##ImeInput", nullptr, state->current_text.begin(), ime_param->maxTextLength,
                    input_size, ImGuiInputTextFlags_CallbackAlways, InputTextCallback, this)) {
-        state->input_changed = true;
    }
 }

@@ -190,6 +191,39 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
    ImeUi* ui = static_cast<ImeUi*>(data->UserData);
    ASSERT(ui);

+    static std::string lastText;
+    std::string currentText(data->Buf, data->BufTextLen);
+    if (currentText != lastText) {
+        OrbisImeEditText eventParam{};
+        eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
+        eventParam.caret_index = data->CursorPos;
+        eventParam.area_num = 1;
+
+        eventParam.text_area[0].mode = 1; // Edit mode
+        eventParam.text_area[0].index = data->CursorPos;
+        eventParam.text_area[0].length = data->BufTextLen;
+
+        if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
+                                           ui->ime_param->maxTextLength)) {
+            LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
+            return 0;
+        }
+
+        if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
+                                           ui->ime_param->inputTextBuffer,
+                                           ui->ime_param->maxTextLength)) {
+            LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
+            return 0;
+        }
+
+        OrbisImeEvent event{};
+        event.id = OrbisImeEventId::UpdateText;
+        event.param.text = eventParam;
+
+        lastText = currentText;
+        ui->state->SendEvent(&event);
+    }
+
    static int lastCaretPos = -1;
    if (lastCaretPos == -1) {
        lastCaretPos = data->CursorPos;
@@ -209,39 +243,6 @@ int ImeUi::InputTextCallback(ImGuiInputTextCallbackData* data) {
        ui->state->SendEvent(&event);
    }

-    static std::string lastText;
-    std::string currentText(data->Buf, data->BufTextLen);
-    if (currentText != lastText) {
-        OrbisImeEditText eventParam{};
-        eventParam.str = reinterpret_cast<char16_t*>(ui->ime_param->work);
-        eventParam.caret_index = data->CursorPos;
-        eventParam.area_num = 1;
-
-        eventParam.text_area[0].mode = 1; // Edit mode
-        eventParam.text_area[0].index = data->CursorPos;
-        eventParam.text_area[0].length = data->BufTextLen;
-
-        if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen, eventParam.str,
-                                           ui->ime_param->max_text_length)) {
-            LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
-            return 0;
-        }
-
-        if (!ui->state->ConvertUTF8ToOrbis(data->Buf, data->BufTextLen,
-                                           ui->ime_param->input_text_buffer,
-                                           ui->ime_param->max_text_length)) {
-            LOG_ERROR(Lib_ImeDialog, "Failed to convert Orbis char to UTF-8");
-            return 0;
-        }
-
-        OrbisImeEvent event{};
-        event.id = OrbisImeEventId::UpdateText;
-        event.param.text = eventParam;
-
-        lastText = currentText;
-        ui->state->SendEvent(&event);
-    }
-
    return 0;
 }

--- a/src/core/libraries/ime/ime_ui.h
+++ b/src/core/libraries/ime/ime_ui.h
@@ -22,10 +22,7 @@ class ImeState {
    friend class ImeHandler;
    friend class ImeUi;

-    bool input_changed = false;
-
    void* work_buffer{};
-
    char16_t* text_buffer{};

    // A character can hold up to 4 bytes in UTF-8
@@ -43,6 +40,9 @@ public:
    void SendEnterEvent();
    void SendCloseEvent();

+    void SetText(const char16_t* text, u32 length);
+    void SetCaret(u32 position);
+
 private:
    bool ConvertOrbisToUTF8(const char16_t* orbis_text, std::size_t orbis_text_len, char* utf8_text,
                            std::size_t native_text_len);
--- a/src/core/libraries/kernel/kernel.cpp
+++ b/src/core/libraries/kernel/kernel.cpp
@@ -203,7 +203,7 @@ int PS4_SYSV_ABI _sigprocmask() {
 }

 int PS4_SYSV_ABI posix_getpagesize() {
-    return 4096;
+    return 16_KB;
 }

 void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
--- a/src/core/libraries/kernel/threads/pthread.cpp
+++ b/src/core/libraries/kernel/threads/pthread.cpp
@@ -281,7 +281,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt

    /* Create thread */
    new_thread->native_thr = Core::Thread();
-    int ret = new_thread->native_thr.Create(RunThread, new_thread);
+    int ret = new_thread->native_thr.Create(RunThread, new_thread, &new_thread->attr);
    ASSERT_MSG(ret == 0, "Failed to create thread with error {}", ret);
    if (ret) {
        *thread = nullptr;
--- a/src/core/thread.cpp
+++ b/src/core/thread.cpp
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

+#include "libraries/kernel/threads/pthread.h"
 #include "thread.h"

 #ifdef _WIN64
@@ -15,7 +16,7 @@ Thread::Thread() : native_handle{0} {}

 Thread::~Thread() {}

-int Thread::Create(ThreadFunc func, void* arg) {
+int Thread::Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr) {
 #ifdef _WIN64
    native_handle = CreateThread(nullptr, 0, (LPTHREAD_START_ROUTINE)func, arg, 0, nullptr);
    return native_handle ? 0 : -1;
@@ -23,6 +24,7 @@ int Thread::Create(ThreadFunc func, void* arg) {
    pthread_t* pthr = reinterpret_cast<pthread_t*>(&native_handle);
    pthread_attr_t pattr;
    pthread_attr_init(&pattr);
+    pthread_attr_setstack(&pattr, attr->stackaddr_attr, attr->stacksize_attr);
    return pthread_create(pthr, &pattr, (PthreadFunc)func, arg);
 #endif
 }
--- a/src/core/thread.h
+++ b/src/core/thread.h
@@ -5,6 +5,10 @@

 #include "common/types.h"

+namespace Libraries::Kernel {
+struct PthreadAttr;
+} // namespace Libraries::Kernel
+
 namespace Core {

 class Thread {
@@ -15,7 +19,7 @@ public:
    Thread();
    ~Thread();

-    int Create(ThreadFunc func, void* arg);
+    int Create(ThreadFunc func, void* arg, const ::Libraries::Kernel::PthreadAttr* attr);
    void Exit();

    uintptr_t GetHandle() {
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -206,7 +206,7 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
    return main;
 }

-void SetupCapabilities(const Info& info, EmitContext& ctx) {
+void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
    ctx.AddCapability(spv::Capability::Image1D);
    ctx.AddCapability(spv::Capability::Sampled1D);
    ctx.AddCapability(spv::Capability::ImageQuery);
@@ -251,6 +251,10 @@ void SetupCapabilities(const Info& info, EmitContext& ctx) {
    if (info.stage == Stage::Geometry) {
        ctx.AddCapability(spv::Capability::Geometry);
    }
+    if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
+        ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
+        ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
+    }
 }

 void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
@@ -342,7 +346,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
    EmitContext ctx{profile, runtime_info, program.info, binding};
    const Id main{DefineMain(ctx, program)};
    DefineEntryPoint(program, ctx, main);
-    SetupCapabilities(program.info, ctx);
+    SetupCapabilities(program.info, profile, ctx);
    SetupFloatMode(ctx, profile, runtime_info, main);
    PatchPhiNodes(program, ctx);
    binding.user_data += program.info.ud_mask.NumRegs();
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -171,54 +171,38 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
                                      rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
 }

+Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
+    if (IR::IsPosition(attr)) {
+        ASSERT(attr == IR::Attribute::Position0);
+        const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
+        const auto pointer{
+            ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
+        const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
+    }
+
+    if (IR::IsParam(attr)) {
+        const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
+        const auto param = ctx.input_params.at(param_id).id;
+        const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
+        const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
+        const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
+        return ctx.OpLoad(ctx.F32[1],
+                          ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
+    }
+    UNREACHABLE();
+}
+
 Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
    if (ctx.info.stage == Stage::Geometry) {
-        if (IR::IsPosition(attr)) {
-            ASSERT(attr == IR::Attribute::Position0);
-            const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
-            const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index),
-                                                 ctx.ConstU32(0u))};
-            const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
-            return ctx.OpLoad(ctx.F32[1],
-                              ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
-        }
-
-        if (IR::IsParam(attr)) {
-            const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
-            const auto param = ctx.input_params.at(param_id).id;
-            const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
-            const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
-            const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
-            return ctx.OpLoad(ctx.F32[1],
-                              ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
-        }
-        UNREACHABLE();
+        return EmitGetAttributeForGeometry(ctx, attr, comp, index);
    }

    if (IR::IsParam(attr)) {
        const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
        const auto& param{ctx.input_params.at(index)};
-        if (param.buffer_handle < 0) {
-            if (!ValidId(param.id)) {
-                // Attribute is disabled or varying component is not written
-                return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
-            }
-
-            Id result;
-            if (param.is_default) {
-                result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
-            } else if (param.num_components > 1) {
-                const Id pointer{
-                    ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
-                result = ctx.OpLoad(param.component_type, pointer);
-            } else {
-                result = ctx.OpLoad(param.component_type, param.id);
-            }
-            if (param.is_integer) {
-                result = ctx.OpBitcast(ctx.F32[1], result);
-            }
-            return result;
-        } else {
+        if (param.buffer_handle >= 0) {
            const auto step_rate = EmitReadStepRate(ctx, param.id.value);
            const auto offset = ctx.OpIAdd(
                ctx.U32[1],
@@ -229,7 +213,26 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
                ctx.ConstU32(comp));
            return EmitReadConstBuffer(ctx, param.buffer_handle, offset);
        }
+
+        Id result;
+        if (param.is_loaded) {
+            // Attribute is either default or manually interpolated. The id points to an already
+            // loaded vector.
+            result = ctx.OpCompositeExtract(param.component_type, param.id, comp);
+        } else if (param.num_components > 1) {
+            // Attribute is a vector and we need to access a specific component.
+            const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
+            result = ctx.OpLoad(param.component_type, pointer);
+        } else {
+            // Attribute is a single float or interger, simply load it.
+            result = ctx.OpLoad(param.component_type, param.id);
+        }
+        if (param.is_integer) {
+            result = ctx.OpBitcast(ctx.F32[1], result);
+        }
+        return result;
    }
+
    switch (attr) {
    case IR::Attribute::FragCoord: {
        const Id coord = ctx.OpLoad(
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -187,7 +187,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
 Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool has_mips) {
    const auto& texture = ctx.images[handle & 0xFFFF];
    const Id image = ctx.OpLoad(texture.image_type, texture.id);
-    const auto type = ctx.info.images[handle & 0xFFFF].type;
+    const auto sharp = ctx.info.images[handle & 0xFFFF].GetSharp(ctx.info);
+    const auto type = sharp.GetBoundType();
    const Id zero = ctx.u32_zero_value;
    const auto mips{[&] { return has_mips ? ctx.OpImageQueryLevels(ctx.U32[1], image) : zero; }};
    const bool uses_lod{type != AmdGpu::ImageType::Color2DMsaa && !texture.is_storage};
--- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
@@ -8,6 +8,9 @@
 namespace Shader::Backend::SPIRV {

 void EmitPrologue(EmitContext& ctx) {
+    if (ctx.stage == Stage::Fragment) {
+        ctx.DefineInterpolatedAttribs();
+    }
    ctx.DefineBufferOffsets();
 }

--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -4,6 +4,7 @@
 #include "common/assert.h"
 #include "common/div_ceil.h"
 #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/ir/passes/srt.h"
 #include "video_core/amdgpu/types.h"

@@ -155,18 +156,12 @@ void EmitContext::DefineInterfaces() {
 }

 const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
-    switch (fmt) {
-    case AmdGpu::NumberFormat::Float:
-    case AmdGpu::NumberFormat::Unorm:
-    case AmdGpu::NumberFormat::Snorm:
-    case AmdGpu::NumberFormat::SnormNz:
-    case AmdGpu::NumberFormat::Sscaled:
-    case AmdGpu::NumberFormat::Uscaled:
-    case AmdGpu::NumberFormat::Srgb:
+    switch (GetNumberClass(fmt)) {
+    case AmdGpu::NumberClass::Float:
        return ctx.F32;
-    case AmdGpu::NumberFormat::Sint:
+    case AmdGpu::NumberClass::Sint:
        return ctx.S32;
-    case AmdGpu::NumberFormat::Uint:
+    case AmdGpu::NumberClass::Uint:
        return ctx.U32;
    default:
        break;
@@ -176,18 +171,12 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {

 EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id,
                                                          u32 num_components, bool output) {
-    switch (fmt) {
-    case AmdGpu::NumberFormat::Float:
-    case AmdGpu::NumberFormat::Unorm:
-    case AmdGpu::NumberFormat::Snorm:
-    case AmdGpu::NumberFormat::SnormNz:
-    case AmdGpu::NumberFormat::Sscaled:
-    case AmdGpu::NumberFormat::Uscaled:
-    case AmdGpu::NumberFormat::Srgb:
+    switch (GetNumberClass(fmt)) {
+    case AmdGpu::NumberClass::Float:
        return {id, output ? output_f32 : input_f32, F32[1], num_components, false};
-    case AmdGpu::NumberFormat::Uint:
+    case AmdGpu::NumberClass::Uint:
        return {id, output ? output_u32 : input_u32, U32[1], num_components, true};
-    case AmdGpu::NumberFormat::Sint:
+    case AmdGpu::NumberClass::Sint:
        return {id, output ? output_s32 : input_s32, S32[1], num_components, true};
    default:
        break;
@@ -222,6 +211,36 @@ void EmitContext::DefineBufferOffsets() {
    }
 }

+void EmitContext::DefineInterpolatedAttribs() {
+    if (!profile.needs_manual_interpolation) {
+        return;
+    }
+    // Iterate all input attributes, load them and manually interpolate with barycentric
+    // coordinates.
+    for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
+        const auto& input = runtime_info.fs_info.inputs[i];
+        const u32 semantic = input.param_index;
+        auto& params = input_params[semantic];
+        if (input.is_flat || params.is_loaded) {
+            continue;
+        }
+        const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)};
+        const Id p0{OpCompositeExtract(F32[4], p_array, 0U)};
+        const Id p1{OpCompositeExtract(F32[4], p_array, 1U)};
+        const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
+        const Id p10{OpFSub(F32[4], p1, p0)};
+        const Id p20{OpFSub(F32[4], p2, p0)};
+        const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)};
+        const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
+        const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
+        const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
+        const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)};
+        params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z));
+        Name(params.id, fmt::format("fs_in_attr{}", semantic));
+        params.is_loaded = true;
+    }
+}
+
 Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
    switch (default_value) {
    case 0:
@@ -250,33 +269,42 @@ void EmitContext::DefineInputs() {
        base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
        instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);

-        for (const auto& input : info.vs_inputs) {
-            ASSERT(input.binding < IR::NumParams);
-            const Id type{GetAttributeType(*this, input.fmt)[4]};
-            if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ||
-                input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) {
-
+        const auto fetch_shader = Gcn::ParseFetchShader(info);
+        if (!fetch_shader) {
+            break;
+        }
+        for (const auto& attrib : fetch_shader->attributes) {
+            ASSERT(attrib.semantic < IR::NumParams);
+            const auto sharp = attrib.GetSharp(info);
+            const Id type{GetAttributeType(*this, sharp.GetNumberFmt())[4]};
+            if (attrib.UsesStepRates()) {
                const u32 rate_idx =
-                    input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0
-                                                                                             : 1;
+                    attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::OverStepRate0 ? 0
+                                                                                                : 1;
+                const u32 num_components = AmdGpu::NumComponents(sharp.GetDataFmt());
+                const auto buffer =
+                    std::ranges::find_if(info.buffers, [&attrib](const auto& buffer) {
+                        return buffer.instance_attrib == attrib.semantic;
+                    });
                // Note that we pass index rather than Id
-                input_params[input.binding] = {
-                    rate_idx,
-                    input_u32,
-                    U32[1],
-                    input.num_components,
-                    true,
-                    false,
-                    input.instance_data_buf,
+                input_params[attrib.semantic] = SpirvAttribute{
+                    .id = rate_idx,
+                    .pointer_type = input_u32,
+                    .component_type = U32[1],
+                    .num_components = std::min<u16>(attrib.num_elements, num_components),
+                    .is_integer = true,
+                    .is_loaded = false,
+                    .buffer_handle = int(buffer - info.buffers.begin()),
                };
            } else {
-                Id id{DefineInput(type, input.binding)};
-                if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) {
-                    Name(id, fmt::format("vs_instance_attr{}", input.binding));
+                Id id{DefineInput(type, attrib.semantic)};
+                if (attrib.GetStepRate() == Gcn::VertexAttribute::InstanceIdType::Plain) {
+                    Name(id, fmt::format("vs_instance_attr{}", attrib.semantic));
                } else {
-                    Name(id, fmt::format("vs_in_attr{}", input.binding));
+                    Name(id, fmt::format("vs_in_attr{}", attrib.semantic));
                }
-                input_params[input.binding] = GetAttributeInfo(input.fmt, id, 4, false);
+                input_params[attrib.semantic] =
+                    GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
                interfaces.push_back(id);
            }
        }
@@ -286,6 +314,10 @@ void EmitContext::DefineInputs() {
        frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
        frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
        front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
+        if (profile.needs_manual_interpolation) {
+            gl_bary_coord_id =
+                DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
+        }
        for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
            const auto& input = runtime_info.fs_info.inputs[i];
            const u32 semantic = input.param_index;
@@ -299,14 +331,21 @@ void EmitContext::DefineInputs() {
            const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
            const u32 num_components = info.loads.NumComponents(param);
            const Id type{F32[num_components]};
-            const Id id{DefineInput(type, semantic)};
-            if (input.is_flat) {
-                Decorate(id, spv::Decoration::Flat);
+            Id attr_id{};
+            if (profile.needs_manual_interpolation && !input.is_flat) {
+                attr_id = DefineInput(TypeArray(type, ConstU32(3U)), semantic);
+                Decorate(attr_id, spv::Decoration::PerVertexKHR);
+                Name(attr_id, fmt::format("fs_in_attr{}_p", semantic));
+            } else {
+                attr_id = DefineInput(type, semantic);
+                Name(attr_id, fmt::format("fs_in_attr{}", semantic));
+            }
+            if (input.is_flat) {
+                Decorate(attr_id, spv::Decoration::Flat);
            }
-            Name(id, fmt::format("fs_in_attr{}", semantic));
            input_params[semantic] =
-                GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, false);
-            interfaces.push_back(id);
+                GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
+            interfaces.push_back(attr_id);
        }
        break;
    case Stage::Compute:
@@ -512,9 +551,10 @@ void EmitContext::DefineBuffers() {

 void EmitContext::DefineTextureBuffers() {
    for (const auto& desc : info.texture_buffers) {
-        const bool is_integer =
-            desc.nfmt == AmdGpu::NumberFormat::Uint || desc.nfmt == AmdGpu::NumberFormat::Sint;
-        const VectorIds& sampled_type{GetAttributeType(*this, desc.nfmt)};
+        const auto sharp = desc.GetSharp(info);
+        const auto nfmt = sharp.GetNumberFmt();
+        const bool is_integer = AmdGpu::IsInteger(nfmt);
+        const VectorIds& sampled_type{GetAttributeType(*this, nfmt)};
        const u32 sampled = desc.is_written ? 2 : 1;
        const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
                                      sampled, spv::ImageFormat::Unknown)};
@@ -609,10 +649,11 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
 }

 Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
-    const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
+    const auto image = desc.GetSharp(ctx.info);
    const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
+    const auto type = image.GetBoundType();
    const u32 sampled = desc.is_storage ? 2 : 1;
-    switch (desc.type) {
+    switch (type) {
    case AmdGpu::ImageType::Color1D:
        return ctx.TypeImage(sampled_type, spv::Dim::Dim1D, false, false, false, sampled, format);
    case AmdGpu::ImageType::Color1DArray:
@@ -631,14 +672,15 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
    default:
        break;
    }
-    throw InvalidArgument("Invalid texture type {}", desc.type);
+    throw InvalidArgument("Invalid texture type {}", type);
 }

 void EmitContext::DefineImagesAndSamplers() {
    for (const auto& image_desc : info.images) {
-        const bool is_integer = image_desc.nfmt == AmdGpu::NumberFormat::Uint ||
-                                image_desc.nfmt == AmdGpu::NumberFormat::Sint;
-        const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
+        const auto sharp = image_desc.GetSharp(info);
+        const auto nfmt = sharp.GetNumberFmt();
+        const bool is_integer = AmdGpu::IsInteger(nfmt);
+        const VectorIds& data_types = GetAttributeType(*this, nfmt);
        const Id sampled_type = data_types[1];
        const Id image_type{ImageType(*this, image_desc, sampled_type)};
        const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -42,7 +42,9 @@ public:
    ~EmitContext();

    Id Def(const IR::Value& value);
+
    void DefineBufferOffsets();
+    void DefineInterpolatedAttribs();

    [[nodiscard]] Id DefineInput(Id type, u32 location) {
        const Id input_id{DefineVar(type, spv::StorageClass::Input)};
@@ -197,6 +199,9 @@ public:

    Id shared_memory_u32_type{};

+    Id interpolate_func{};
+    Id gl_bary_coord_id{};
+
    struct TextureDefinition {
        const VectorIds* data_types;
        Id id;
@@ -241,7 +246,7 @@ public:
        Id component_type;
        u32 num_components;
        bool is_integer{};
-        bool is_default{};
+        bool is_loaded{};
        s32 buffer_handle{-1};
    };
    std::array<SpirvAttribute, IR::NumParams> input_params{};
--- a/src/shader_recompiler/frontend/fetch_shader.cpp
+++ b/src/shader_recompiler/frontend/fetch_shader.cpp
@@ -34,8 +34,14 @@ namespace Shader::Gcn {
 * We take the reverse way, extract the original input semantics from these instructions.
 **/

-FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
-    FetchShaderData data{};
+std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info) {
+    if (!info.has_fetch_shader) {
+        return std::nullopt;
+    }
+    const u32* code;
+    std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code));
+
+    FetchShaderData data{.code = code};
    GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
    GcnDecodeContext decoder;

@@ -49,7 +55,7 @@ FetchShaderData ParseFetchShader(const u32* code, u32* out_size) {
    u32 semantic_index = 0;
    while (!code_slice.atEnd()) {
        const auto inst = decoder.decodeInstruction(code_slice);
-        *out_size += inst.length;
+        data.size += inst.length;

        if (inst.opcode == Opcode::S_SETPC_B64) {
            break;
--- a/src/shader_recompiler/frontend/fetch_shader.h
+++ b/src/shader_recompiler/frontend/fetch_shader.h
@@ -3,26 +3,80 @@

 #pragma once

+#include <ranges>
 #include <vector>
 #include "common/types.h"
+#include "shader_recompiler/info.h"

 namespace Shader::Gcn {

 struct VertexAttribute {
+    enum InstanceIdType : u8 {
+        None = 0,
+        OverStepRate0 = 1,
+        OverStepRate1 = 2,
+        Plain = 3,
+    };
+
    u8 semantic;      ///< Semantic index of the attribute
    u8 dest_vgpr;     ///< Destination VGPR to load first component.
    u8 num_elements;  ///< Number of components to load
    u8 sgpr_base;     ///< SGPR that contains the pointer to the list of vertex V#
    u8 dword_offset;  ///< The dword offset of the V# that describes this attribute.
    u8 instance_data; ///< Indicates that the buffer will be accessed in instance rate
+
+    [[nodiscard]] InstanceIdType GetStepRate() const {
+        return static_cast<InstanceIdType>(instance_data);
+    }
+
+    [[nodiscard]] bool UsesStepRates() const {
+        const auto step_rate = GetStepRate();
+        return step_rate == OverStepRate0 || step_rate == OverStepRate1;
+    }
+
+    [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Shader::Info& info) const noexcept {
+        return info.ReadUdReg<AmdGpu::Buffer>(sgpr_base, dword_offset);
+    }
+
+    bool operator==(const VertexAttribute& other) const {
+        return semantic == other.semantic && dest_vgpr == other.dest_vgpr &&
+               num_elements == other.num_elements && sgpr_base == other.sgpr_base &&
+               dword_offset == other.dword_offset && instance_data == other.instance_data;
+    }
 };

 struct FetchShaderData {
+    const u32* code;
+    u32 size = 0;
    std::vector<VertexAttribute> attributes;
    s8 vertex_offset_sgpr = -1;   ///< SGPR of vertex offset from VADDR
    s8 instance_offset_sgpr = -1; ///< SGPR of instance offset from VADDR
+
+    [[nodiscard]] bool UsesStepRates() const {
+        return std::ranges::find_if(attributes, [](const VertexAttribute& attribute) {
+                   return attribute.UsesStepRates();
+               }) != attributes.end();
+    }
+
+    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs,
+                                                     const Info& info) const {
+        u32 vertex_offset = regs.index_offset;
+        u32 instance_offset = 0;
+        if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
+            vertex_offset = info.user_data[vertex_offset_sgpr];
+        }
+        if (instance_offset_sgpr != -1) {
+            instance_offset = info.user_data[instance_offset_sgpr];
+        }
+        return {vertex_offset, instance_offset};
+    }
+
+    bool operator==(const FetchShaderData& other) const {
+        return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr &&
+               instance_offset_sgpr == other.instance_offset_sgpr;
+    }
 };

-FetchShaderData ParseFetchShader(const u32* code, u32* out_size);
+std::optional<FetchShaderData> ParseFetchShader(const Shader::Info& info);

 } // namespace Shader::Gcn
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -368,13 +368,11 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra

 void Translator::EmitFetch(const GcnInst& inst) {
    // Read the pointer to the fetch shader assembly.
-    const u32 sgpr_base = inst.src[0].code;
-    const u32* code;
-    std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
+    info.has_fetch_shader = true;
+    info.fetch_shader_sgpr_base = inst.src[0].code;

-    // Parse the assembly to generate a list of attributes.
-    u32 fetch_size{};
-    const auto fetch_data = ParseFetchShader(code, &fetch_size);
+    const auto fetch_data = ParseFetchShader(info);
+    ASSERT(fetch_data.has_value());

    if (Config::dumpShaders()) {
        using namespace Common::FS;
@@ -384,13 +382,10 @@ void Translator::EmitFetch(const GcnInst& inst) {
        }
        const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
        const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
-        file.WriteRaw<u8>(code, fetch_size);
+        file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
    }

-    info.vertex_offset_sgpr = fetch_data.vertex_offset_sgpr;
-    info.instance_offset_sgpr = fetch_data.instance_offset_sgpr;
-
-    for (const auto& attrib : fetch_data.attributes) {
+    for (const auto& attrib : fetch_data->attributes) {
        const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
        IR::VectorReg dst_reg{attrib.dest_vgpr};

@@ -420,29 +415,14 @@ void Translator::EmitFetch(const GcnInst& inst) {

        // In case of programmable step rates we need to fallback to instance data pulling in
        // shader, so VBs should be bound as regular data buffers
-        s32 instance_buf_handle = -1;
-        const auto step_rate = static_cast<Info::VsInput::InstanceIdType>(attrib.instance_data);
-        if (step_rate == Info::VsInput::OverStepRate0 ||
-            step_rate == Info::VsInput::OverStepRate1) {
+        if (attrib.UsesStepRates()) {
            info.buffers.push_back({
                .sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
                .used_types = IR::Type::F32,
                .is_instance_data = true,
+                .instance_attrib = attrib.semantic,
            });
-            instance_buf_handle = s32(info.buffers.size() - 1);
-            info.uses_step_rates = true;
        }
-
-        const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
-        info.vs_inputs.push_back({
-            .fmt = buffer.GetNumberFmt(),
-            .binding = attrib.semantic,
-            .num_components = std::min<u16>(attrib.num_elements, num_components),
-            .sgpr_base = attrib.sgpr_base,
-            .dword_offset = attrib.dword_offset,
-            .instance_step_rate = step_rate,
-            .instance_data_buf = instance_buf_handle,
-        });
    }
 }

--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -45,6 +45,7 @@ struct BufferResource {
    AmdGpu::Buffer inline_cbuf;
    bool is_gds_buffer{};
    bool is_instance_data{};
+    u8 instance_attrib{};
    bool is_written{};

    bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
@@ -57,7 +58,6 @@ using BufferResourceList = boost::container::small_vector<BufferResource, 16>;

 struct TextureBufferResource {
    u32 sharp_idx;
-    AmdGpu::NumberFormat nfmt;
    bool is_written{};

    constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
@@ -66,8 +66,6 @@ using TextureBufferResourceList = boost::container::small_vector<TextureBufferRe

 struct ImageResource {
    u32 sharp_idx;
-    AmdGpu::ImageType type;
-    AmdGpu::NumberFormat nfmt;
    bool is_storage{};
    bool is_depth{};
    bool is_atomic{};
@@ -115,24 +113,6 @@ static_assert(sizeof(PushData) <= 128,
 * Contains general information generated by the shader recompiler for an input program.
 */
 struct Info {
-    struct VsInput {
-        enum InstanceIdType : u8 {
-            None = 0,
-            OverStepRate0 = 1,
-            OverStepRate1 = 2,
-            Plain = 3,
-        };
-
-        AmdGpu::NumberFormat fmt;
-        u16 binding;
-        u16 num_components;
-        u8 sgpr_base;
-        u8 dword_offset;
-        InstanceIdType instance_step_rate;
-        s32 instance_data_buf;
-    };
-    boost::container::static_vector<VsInput, 32> vs_inputs{};
-
    struct AttributeFlags {
        bool Get(IR::Attribute attrib, u32 comp = 0) const {
            return flags[Index(attrib)] & (1 << comp);
@@ -179,9 +159,6 @@ struct Info {

    CopyShaderData gs_copy_data;

-    s8 vertex_offset_sgpr = -1;
-    s8 instance_offset_sgpr = -1;
-
    BufferResourceList buffers;
    TextureBufferResourceList texture_buffers;
    ImageResourceList images;
@@ -208,10 +185,11 @@ struct Info {
    bool uses_shared{};
    bool uses_fp16{};
    bool uses_fp64{};
-    bool uses_step_rates{};
    bool translation_failed{}; // indicates that shader has unsupported instructions
    bool has_readconst{};
    u8 mrt_mask{0u};
+    bool has_fetch_shader{false};
+    u32 fetch_shader_sgpr_base{0u};

    explicit Info(Stage stage_, ShaderParams params)
        : stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
@@ -252,18 +230,6 @@ struct Info {
        bnd.user_data += ud_mask.NumRegs();
    }

-    [[nodiscard]] std::pair<u32, u32> GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const {
-        u32 vertex_offset = regs.index_offset;
-        u32 instance_offset = 0;
-        if (vertex_offset == 0 && vertex_offset_sgpr != -1) {
-            vertex_offset = user_data[vertex_offset_sgpr];
-        }
-        if (instance_offset_sgpr != -1) {
-            instance_offset = user_data[instance_offset_sgpr];
-        }
-        return {vertex_offset, instance_offset};
-    }
-
    void RefreshFlatBuf() {
        flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
        ASSERT(user_data.size() <= NumUserDataRegs);
@@ -284,7 +250,12 @@ constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const
 }

 constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
-    return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
+    const auto image = info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
+    if (!image.Valid()) {
+        // Fall back to null image if unbound.
+        return AmdGpu::Image::Null();
+    }
+    return image;
 }

 constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -381,7 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
    const auto buffer = info.ReadUdSharp<AmdGpu::Buffer>(sharp);
    const s32 binding = descriptors.Add(TextureBufferResource{
        .sharp_idx = sharp,
-        .nfmt = buffer.GetNumberFmt(),
        .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
    });

@@ -660,11 +659,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
        }
    }

-    const auto type = image.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : image.GetType();
    u32 image_binding = descriptors.Add(ImageResource{
        .sharp_idx = tsharp,
-        .type = type,
-        .nfmt = image.GetNumberFmt(),
        .is_storage = is_storage,
        .is_depth = bool(inst_info.is_depth),
        .is_atomic = IsImageAtomicInstruction(inst),
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -22,8 +22,10 @@ struct Profile {
    bool support_fp32_denorm_preserve{};
    bool support_fp32_denorm_flush{};
    bool support_explicit_workgroup_layout{};
+    bool support_legacy_vertex_attributes{};
    bool has_broken_spirv_clamp{};
    bool lower_left_origin_mode{};
+    bool needs_manual_interpolation{};
    u64 min_ssbo_alignment{};
 };

--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -6,12 +6,19 @@
 #include <bitset>

 #include "common/types.h"
+#include "frontend/fetch_shader.h"
 #include "shader_recompiler/backend/bindings.h"
 #include "shader_recompiler/info.h"
 #include "shader_recompiler/ir/passes/srt.h"

 namespace Shader {

+struct VsAttribSpecialization {
+    AmdGpu::NumberClass num_class{};
+
+    auto operator<=>(const VsAttribSpecialization&) const = default;
+};
+
 struct BufferSpecialization {
    u16 stride : 14;
    u16 is_storage : 1;
@@ -50,6 +57,8 @@ struct StageSpecialization {

    const Shader::Info* info;
    RuntimeInfo runtime_info;
+    Gcn::FetchShaderData fetch_shader_data{};
+    boost::container::small_vector<VsAttribSpecialization, 32> vs_attribs;
    std::bitset<MaxStageResources> bitset{};
    boost::container::small_vector<BufferSpecialization, 16> buffers;
    boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
@@ -57,9 +66,19 @@ struct StageSpecialization {
    boost::container::small_vector<FMaskSpecialization, 8> fmasks;
    Backend::Bindings start{};

-    explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
-                                 Backend::Bindings start_)
+    explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
+                                 const Profile& profile_, Backend::Bindings start_)
        : info{&info_}, runtime_info{runtime_info_}, start{start_} {
+        if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) {
+            fetch_shader_data = *fetch_shader;
+            if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) {
+                // Specialize shader on VS input number types to follow spec.
+                ForEachSharp(vs_attribs, fetch_shader_data.attributes,
+                             [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
+                                 spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt());
+                             });
+            }
+        }
        u32 binding{};
        if (info->has_readconst) {
            binding++;
@@ -75,8 +94,7 @@ struct StageSpecialization {
                     });
        ForEachSharp(binding, images, info->images,
                     [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
-                         spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
-                                                              : sharp.GetType();
+                         spec.type = sharp.GetBoundType();
                         spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
                     });
        ForEachSharp(binding, fmasks, info->fmasks,
@@ -86,6 +104,17 @@ struct StageSpecialization {
                     });
    }

+    void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
+        for (const auto& desc : desc_list) {
+            auto& spec = spec_list.emplace_back();
+            const auto sharp = desc.GetSharp(*info);
+            if (!sharp) {
+                continue;
+            }
+            func(spec, desc, sharp);
+        }
+    }
+
    void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
        for (const auto& desc : desc_list) {
            auto& spec = spec_list.emplace_back();
@@ -106,6 +135,14 @@ struct StageSpecialization {
        if (runtime_info != other.runtime_info) {
            return false;
        }
+        if (fetch_shader_data != other.fetch_shader_data) {
+            return false;
+        }
+        for (u32 i = 0; i < vs_attribs.size(); i++) {
+            if (vs_attribs[i] != other.vs_attribs[i]) {
+                return false;
+            }
+        }
        u32 binding{};
        if (info->has_readconst != other.info->has_readconst) {
            return false;
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@@ -10,7 +10,24 @@

 namespace AmdGpu {

-[[nodiscard]] constexpr bool IsInteger(NumberFormat nfmt) {
+enum NumberClass {
+    Float,
+    Sint,
+    Uint,
+};
+
+[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
+    switch (nfmt) {
+    case NumberFormat::Sint:
+        return Sint;
+    case NumberFormat::Uint:
+        return Uint;
+    default:
+        return Float;
+    }
+}
+
+[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
    return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
 }

--- a/src/video_core/amdgpu/resource.h
+++ b/src/video_core/amdgpu/resource.h
@@ -304,6 +304,10 @@ struct Image {
        const auto viewed_slice = last_array - base_array + 1;
        return GetType() == ImageType::Cube && viewed_slice < 6;
    }
+
+    ImageType GetBoundType() const noexcept {
+        return IsPartialCubemap() ? ImageType::Color2DArray : GetType();
+    }
 };
 static_assert(sizeof(Image) == 32); // 256bits

--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -5,6 +5,7 @@
 #include "common/alignment.h"
 #include "common/scope_exit.h"
 #include "common/types.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "shader_recompiler/info.h"
 #include "video_core/amdgpu/liverpool.h"
 #include "video_core/buffer_cache/buffer_cache.h"
@@ -107,7 +108,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
    }
 }

-bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
+bool BufferCache::BindVertexBuffers(
+    const Shader::Info& vs_info, const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader) {
    boost::container::small_vector<vk::VertexInputAttributeDescription2EXT, 16> attributes;
    boost::container::small_vector<vk::VertexInputBindingDescription2EXT, 16> bindings;
    SCOPE_EXIT {
@@ -126,7 +128,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
        }
    };

-    if (vs_info.vs_inputs.empty()) {
+    if (!fetch_shader || fetch_shader->attributes.empty()) {
        return false;
    }

@@ -150,30 +152,29 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
    // Calculate buffers memory overlaps
    bool has_step_rate = false;
    boost::container::static_vector<BufferRange, NumVertexBuffers> ranges{};
-    for (const auto& input : vs_info.vs_inputs) {
-        if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
-            input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
+    for (const auto& attrib : fetch_shader->attributes) {
+        if (attrib.UsesStepRates()) {
            has_step_rate = true;
            continue;
        }

-        const auto& buffer = vs_info.ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
+        const auto& buffer = attrib.GetSharp(vs_info);
        if (buffer.GetSize() == 0) {
            continue;
        }
        guest_buffers.emplace_back(buffer);
        ranges.emplace_back(buffer.base_address, buffer.base_address + buffer.GetSize());
        attributes.push_back({
-            .location = input.binding,
-            .binding = input.binding,
+            .location = attrib.semantic,
+            .binding = attrib.semantic,
            .format =
                Vulkan::LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
            .offset = 0,
        });
        bindings.push_back({
-            .binding = input.binding,
+            .binding = attrib.semantic,
            .stride = buffer.GetStride(),
-            .inputRate = input.instance_step_rate == Shader::Info::VsInput::None
+            .inputRate = attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
                             ? vk::VertexInputRate::eVertex
                             : vk::VertexInputRate::eInstance,
            .divisor = 1,
@@ -236,7 +237,7 @@ bool BufferCache::BindVertexBuffers(const Shader::Info& vs_info) {
 u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
    // Emulate QuadList primitive type with CPU made index buffer.
    const auto& regs = liverpool->regs;
-    if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) {
+    if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList && !is_indexed) {
        is_indexed = true;

        // Emit indices.
@@ -262,6 +263,32 @@ u32 BufferCache::BindIndexBuffer(bool& is_indexed, u32 index_offset) {
    VAddr index_address = regs.index_base_address.Address<VAddr>();
    index_address += index_offset * index_size;

+    if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) {
+        // Convert indices.
+        const u32 new_index_size = regs.num_indices * index_size * 6 / 4;
+        const auto [data, offset] = stream_buffer.Map(new_index_size);
+        const auto index_ptr = reinterpret_cast<u8*>(index_address);
+        switch (index_type) {
+        case vk::IndexType::eUint16:
+            Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices<u16>(data, index_ptr,
+                                                                         regs.num_indices);
+            break;
+        case vk::IndexType::eUint32:
+            Vulkan::LiverpoolToVK::ConvertQuadToTriangleListIndices<u32>(data, index_ptr,
+                                                                         regs.num_indices);
+            break;
+        default:
+            UNREACHABLE_MSG("Unsupported QuadList index type {}", vk::to_string(index_type));
+            break;
+        }
+        stream_buffer.Commit();
+
+        // Bind index buffer.
+        const auto cmdbuf = scheduler.CommandBuffer();
+        cmdbuf.bindIndexBuffer(stream_buffer.Handle(), offset, index_type);
+        return new_index_size / index_size;
+    }
+
    // Bind index buffer.
    const u32 index_buffer_size = regs.num_indices * index_size;
    const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false);
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -20,8 +20,11 @@ struct Liverpool;
 }

 namespace Shader {
-struct Info;
+namespace Gcn {
+struct FetchShaderData;
 }
+struct Info;
+} // namespace Shader

 namespace VideoCore {

@@ -76,7 +79,8 @@ public:
    void InvalidateMemory(VAddr device_addr, u64 size);

    /// Binds host vertex buffers for the current draw.
-    bool BindVertexBuffers(const Shader::Info& vs_info);
+    bool BindVertexBuffers(const Shader::Info& vs_info,
+                           const std::optional<Shader::Gcn::FetchShaderData>& fetch_shader);

    /// Bind host index buffer for the current draw.
    u32 BindIndexBuffer(bool& is_indexed, u32 index_offset);
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@@ -726,19 +726,6 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
    return format->vk_format;
 }

-void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
-    static constexpr u16 NumVerticesPerQuad = 4;
-    u16* out_data = reinterpret_cast<u16*>(out_ptr);
-    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
-        *out_data++ = i;
-        *out_data++ = i + 1;
-        *out_data++ = i + 2;
-        *out_data++ = i;
-        *out_data++ = i + 2;
-        *out_data++ = i + 3;
-    }
-}
-
 vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color_buffer) {
    const auto comp_swap = color_buffer.info.comp_swap.Value();
    const auto format = color_buffer.info.format.Value();
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.h
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h
@@ -68,7 +68,33 @@ vk::ClearValue ColorBufferClearValue(const AmdGpu::Liverpool::ColorBuffer& color

 vk::SampleCountFlagBits NumSamples(u32 num_samples, vk::SampleCountFlags supported_flags);

-void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices);
+static constexpr u16 NumVerticesPerQuad = 4;
+
+inline void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) {
+    u16* out_data = reinterpret_cast<u16*>(out_ptr);
+    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
+        *out_data++ = i;
+        *out_data++ = i + 1;
+        *out_data++ = i + 2;
+        *out_data++ = i;
+        *out_data++ = i + 2;
+        *out_data++ = i + 3;
+    }
+}
+
+template <typename T>
+void ConvertQuadToTriangleListIndices(u8* out_ptr, const u8* in_ptr, u32 num_vertices) {
+    T* out_data = reinterpret_cast<T*>(out_ptr);
+    const T* in_data = reinterpret_cast<const T*>(in_ptr);
+    for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) {
+        *out_data++ = in_data[i];
+        *out_data++ = in_data[i + 1];
+        *out_data++ = in_data[i + 2];
+        *out_data++ = in_data[i];
+        *out_data++ = in_data[i + 2];
+        *out_data++ = in_data[i + 3];
+    }
+}

 static inline vk::Format PromoteFormatToDepth(vk::Format fmt) {
    if (fmt == vk::Format::eR32Sfloat) {
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later

 #include <algorithm>
+#include <utility>
 #include <boost/container/small_vector.hpp>
 #include <boost/container/static_vector.hpp>

@@ -10,6 +11,8 @@
 #include "video_core/amdgpu/resource.h"
 #include "video_core/buffer_cache/buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/texture_cache/texture_cache.h"
@@ -20,8 +23,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
                                   DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
                                   vk::PipelineCache pipeline_cache,
                                   std::span<const Shader::Info*, MaxShaderStages> infos,
+                                   std::optional<const Shader::Gcn::FetchShaderData> fetch_shader_,
                                   std::span<const vk::ShaderModule> modules)
-    : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_} {
+    : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, key{key_},
+      fetch_shader{std::move(fetch_shader_)} {
    const vk::Device device = instance.GetDevice();
    std::ranges::copy(infos, stages.begin());
    BuildDescSetLayout();
@@ -46,32 +51,31 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul

    boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
    boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
-    if (!instance.IsVertexInputDynamicState()) {
-        const auto& vs_info = stages[u32(Shader::Stage::Vertex)];
-        for (const auto& input : vs_info->vs_inputs) {
-            if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
-                input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
+    if (fetch_shader && !instance.IsVertexInputDynamicState()) {
+        const auto& vs_info = GetStage(Shader::Stage::Vertex);
+        for (const auto& attrib : fetch_shader->attributes) {
+            if (attrib.UsesStepRates()) {
                // Skip attribute binding as the data will be pulled by shader
                continue;
            }

-            const auto buffer =
-                vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
+            const auto buffer = attrib.GetSharp(vs_info);
            if (buffer.GetSize() == 0) {
                continue;
            }
            vertex_attributes.push_back({
-                .location = input.binding,
-                .binding = input.binding,
+                .location = attrib.semantic,
+                .binding = attrib.semantic,
                .format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt()),
                .offset = 0,
            });
            vertex_bindings.push_back({
-                .binding = input.binding,
+                .binding = attrib.semantic,
                .stride = buffer.GetStride(),
-                .inputRate = input.instance_step_rate == Shader::Info::VsInput::None
-                                 ? vk::VertexInputRate::eVertex
-                                 : vk::VertexInputRate::eInstance,
+                .inputRate =
+                    attrib.GetStepRate() == Shader::Gcn::VertexAttribute::InstanceIdType::None
+                        ? vk::VertexInputRate::eVertex
+                        : vk::VertexInputRate::eInstance,
            });
        }
    }
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -4,6 +4,7 @@
 #include <xxhash.h>

 #include "common/types.h"
+#include "shader_recompiler/frontend/fetch_shader.h"
 #include "video_core/renderer_vulkan/liverpool_to_vk.h"
 #include "video_core/renderer_vulkan/vk_common.h"
 #include "video_core/renderer_vulkan/vk_pipeline_common.h"
@@ -59,9 +60,14 @@ public:
    GraphicsPipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
                     const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
                     std::span<const Shader::Info*, MaxShaderStages> stages,
+                     std::optional<const Shader::Gcn::FetchShaderData> fetch_shader,
                     std::span<const vk::ShaderModule> modules);
    ~GraphicsPipeline();

+    const std::optional<const Shader::Gcn::FetchShaderData>& GetFetchShader() const noexcept {
+        return fetch_shader;
+    }
+
    bool IsEmbeddedVs() const noexcept {
        static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
        return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
@@ -94,6 +100,7 @@ private:

 private:
    GraphicsPipelineKey key;
+    std::optional<const Shader::Gcn::FetchShaderData> fetch_shader{};
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -256,6 +256,7 @@ bool Instance::CreateDevice() {
    workgroup_memory_explicit_layout =
        add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
    vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+    fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);

    // The next two extensions are required to be available together in order to support write masks
    color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
@@ -264,6 +265,7 @@ bool Instance::CreateDevice() {
    const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
    list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
    maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
+    legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);

    // These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
    // with extensions.
@@ -399,6 +401,12 @@ bool Instance::CreateDevice() {
        vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
            .primitiveTopologyListRestart = true,
        },
+        vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR{
+            .fragmentShaderBarycentric = true,
+        },
+        vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT{
+            .legacyVertexAttributes = true,
+        },
 #ifdef __APPLE__
        feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
 #endif
@@ -438,6 +446,12 @@ bool Instance::CreateDevice() {
    if (!vertex_input_dynamic_state) {
        device_chain.unlink<vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT>();
    }
+    if (!fragment_shader_barycentric) {
+        device_chain.unlink<vk::PhysicalDeviceFragmentShaderBarycentricFeaturesKHR>();
+    }
+    if (!legacy_vertex_attributes) {
+        device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
+    }

    auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
    if (device_result != vk::Result::eSuccess) {
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@@ -143,10 +143,21 @@ public:
        return maintenance5;
    }

+    /// Returns true when VK_KHR_fragment_shader_barycentric is supported.
+    bool IsFragmentShaderBarycentricSupported() const {
+        return fragment_shader_barycentric;
+    }
+
+    /// Returns true when VK_EXT_primitive_topology_list_restart is supported.
    bool IsListRestartSupported() const {
        return list_restart;
    }

+    /// Returns true when VK_EXT_legacy_vertex_attributes is supported.
+    bool IsLegacyVertexAttributesSupported() const {
+        return legacy_vertex_attributes;
+    }
+
    /// Returns true when geometry shaders are supported by the device
    bool IsGeometryStageSupported() const {
        return features.geometryShader;
@@ -315,6 +326,7 @@ private:
    bool null_descriptor{};
    bool maintenance5{};
    bool list_restart{};
+    bool legacy_vertex_attributes{};
    u64 min_imported_host_pointer_alignment{};
    u32 subgroup_size{};
    bool tooling_info{};
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -169,6 +169,9 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
        .support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
        .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
        .support_explicit_workgroup_layout = true,
+        .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
+        .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
+                                      instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
    };
    auto [cache_result, cache] = instance.GetDevice().createPipelineCacheUnique({});
    ASSERT_MSG(cache_result == vk::Result::eSuccess, "Failed to create pipeline cache: {}",
@@ -185,7 +188,7 @@ const GraphicsPipeline* PipelineCache::GetGraphicsPipeline() {
    const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key);
    if (is_new) {
        it.value() = graphics_pipeline_pool.Create(instance, scheduler, desc_heap, graphics_key,
-                                                   *pipeline_cache, infos, modules);
+                                                   *pipeline_cache, infos, fetch_shader, modules);
    }
    return it->second;
 }
@@ -302,8 +305,12 @@ bool PipelineCache::RefreshGraphicsKey() {
        }

        auto params = Liverpool::GetParams(*pgm);
-        std::tie(infos[stage_out_idx], modules[stage_out_idx], key.stage_hashes[stage_out_idx]) =
-            GetProgram(stage_in, params, binding);
+        std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
+        std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
+                 key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
+        if (fetch_shader_) {
+            fetch_shader = fetch_shader_;
+        }
        return true;
    };

@@ -339,16 +346,14 @@ bool PipelineCache::RefreshGraphicsKey() {
    }
    }

-    const auto* vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
-    if (vs_info && !instance.IsVertexInputDynamicState()) {
+    const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
+    if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
        u32 vertex_binding = 0;
-        for (const auto& input : vs_info->vs_inputs) {
-            if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
-                input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
+        for (const auto& attrib : fetch_shader->attributes) {
+            if (attrib.UsesStepRates()) {
                continue;
            }
-            const auto& buffer =
-                vs_info->ReadUdReg<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
+            const auto& buffer = attrib.GetSharp(*vs_info);
            if (buffer.GetSize() == 0) {
                continue;
            }
@@ -392,7 +397,7 @@ bool PipelineCache::RefreshComputeKey() {
    Shader::Backend::Bindings binding{};
    const auto* cs_pgm = &liverpool->regs.cs_program;
    const auto cs_params = Liverpool::GetParams(*cs_pgm);
-    std::tie(infos[0], modules[0], compute_key) =
+    std::tie(infos[0], modules[0], fetch_shader, compute_key) =
        GetProgram(Shader::Stage::Compute, cs_params, binding);
    return true;
 }
@@ -423,24 +428,26 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
    return module;
 }

-std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
-    Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding) {
+std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
+PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
+                          Shader::Backend::Bindings& binding) {
    const auto runtime_info = BuildRuntimeInfo(stage);
    auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
    if (new_program) {
        Program* program = program_pool.Create(stage, params);
        auto start = binding;
        const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
-        const auto spec = Shader::StageSpecialization(program->info, runtime_info, start);
+        const auto spec = Shader::StageSpecialization(program->info, runtime_info, profile, start);
        program->AddPermut(module, std::move(spec));
        it_pgm.value() = program;
-        return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
+        return std::make_tuple(&program->info, module, spec.fetch_shader_data,
+                               HashCombine(params.hash, 0));
    }

    Program* program = it_pgm->second;
    auto& info = program->info;
    info.RefreshFlatBuf();
-    const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
+    const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
    size_t perm_idx = program->modules.size();
    vk::ShaderModule module{};

@@ -454,7 +461,8 @@ std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram
        module = it->module;
        perm_idx = std::distance(program->modules.begin(), it);
    }
-    return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx));
+    return std::make_tuple(&info, module, spec.fetch_shader_data,
+                           HashCombine(params.hash, perm_idx));
 }

 void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -47,8 +47,10 @@ public:

    const ComputePipeline* GetComputePipeline();

-    std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(
-        Shader::Stage stage, Shader::ShaderParams params, Shader::Backend::Bindings& binding);
+    std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
+               u64>
+    GetProgram(Shader::Stage stage, Shader::ShaderParams params,
+               Shader::Backend::Bindings& binding);

 private:
    bool RefreshGraphicsKey();
@@ -80,6 +82,7 @@ private:
    tsl::robin_map<GraphicsPipelineKey, GraphicsPipeline*> graphics_pipelines;
    std::array<const Shader::Info*, MaxShaderStages> infos{};
    std::array<vk::ShaderModule, MaxShaderStages> modules{};
+    std::optional<Shader::Gcn::FetchShaderData> fetch_shader{};
    GraphicsPipelineKey graphics_key{};
    u64 compute_key{};
 };
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -187,13 +187,14 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
    }

    const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
-    buffer_cache.BindVertexBuffers(vs_info);
+    const auto& fetch_shader = pipeline->GetFetchShader();
+    buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
    const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);

    BeginRendering(*pipeline, state);
    UpdateDynamicState(*pipeline);

-    const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs);
+    const auto [vertex_offset, instance_offset] = fetch_shader->GetDrawOffsets(regs, vs_info);

    const auto cmdbuf = scheduler.CommandBuffer();
    cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
@@ -243,7 +244,8 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
    }

    const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
-    buffer_cache.BindVertexBuffers(vs_info);
+    const auto& fetch_shader = pipeline->GetFetchShader();
+    buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
    buffer_cache.BindIndexBuffer(is_indexed, 0);

    const auto& [buffer, base] =
@@ -397,10 +399,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
        if (!stage) {
            continue;
        }
-        if (stage->uses_step_rates) {
-            push_data.step0 = regs.vgt_instance_step_rate_0;
-            push_data.step1 = regs.vgt_instance_step_rate_1;
-        }
+        push_data.step0 = regs.vgt_instance_step_rate_0;
+        push_data.step1 = regs.vgt_instance_step_rate_1;
        stage->PushUd(binding, push_data);

        BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers);
--- a/src/video_core/texture_cache/image_view.cpp
+++ b/src/video_core/texture_cache/image_view.cpp
@@ -87,12 +87,9 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
        range.extent.levels = image.last_level - image.base_level + 1;
    }
    range.extent.layers = image.last_array - image.base_array + 1;
-    type = ConvertImageViewType(image.GetType());
+    type = ConvertImageViewType(image.GetBoundType());

-    // Adjust view type for partial cubemaps and arrays
-    if (image.IsPartialCubemap()) {
-        type = vk::ImageViewType::e2DArray;
-    }
+    // Adjust view type for arrays
    if (type == vk::ImageViewType::eCube) {
        if (desc.is_array) {
            type = vk::ImageViewType::eCubeArray;
--- a/src/video_core/texture_cache/tile_manager.cpp
+++ b/src/video_core/texture_cache/tile_manager.cpp
@@ -182,12 +182,15 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) {
    case vk::Format::eB8G8R8A8Srgb:
    case vk::Format::eB8G8R8A8Unorm:
    case vk::Format::eR8G8B8A8Unorm:
+    case vk::Format::eR8G8B8A8Snorm:
    case vk::Format::eR8G8B8A8Uint:
    case vk::Format::eR32Sfloat:
    case vk::Format::eR32Uint:
    case vk::Format::eR16G16Sfloat:
    case vk::Format::eR16G16Unorm:
+    case vk::Format::eR16G16Snorm:
    case vk::Format::eB10G11R11UfloatPack32:
+    case vk::Format::eA2B10G10R10UnormPack32:
        return vk::Format::eR32Uint;
    case vk::Format::eBc1RgbaSrgbBlock:
    case vk::Format::eBc1RgbaUnormBlock: