diff --git a/REUSE.toml b/REUSE.toml index 7b2862e53..2d94c9292 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -37,6 +37,7 @@ path = [ "src/images/refresh_icon.png", "src/images/settings_icon.png", "src/images/stop_icon.png", + "src/images/utils_icon.png", "src/images/shadPS4.icns", "src/images/shadps4.ico", "src/images/net.shadps4.shadPS4.svg", diff --git a/src/core/devtools/widget/cmd_list.cpp b/src/core/devtools/widget/cmd_list.cpp index 9a42f8238..219d25d6a 100644 --- a/src/core/devtools/widget/cmd_list.cpp +++ b/src/core/devtools/widget/cmd_list.cpp @@ -3,6 +3,7 @@ // Credits to https://github.com/psucien/tlg-emu-tools/ +#include #include #include #include @@ -1224,12 +1225,12 @@ void CmdListViewer::Draw(bool only_batches_view) { } Text("queue : %s", queue_name); - Text("base addr: %08llX", cmdb_addr); + Text("base addr: %08" PRIXPTR, cmdb_addr); SameLine(); if (SmallButton("Memory >")) { cmdb_view.Open ^= true; } - Text("size : %04llX", cmdb_size); + Text("size : %04zX", cmdb_size); Separator(); { @@ -1292,12 +1293,12 @@ void CmdListViewer::Draw(bool only_batches_view) { if (batch.type == static_cast(0xFF)) { ignore_header = true; } else if (!batch.marker.empty()) { - snprintf(batch_hdr, sizeof(batch_hdr), "%08llX: batch-%03d %s | %s", + snprintf(batch_hdr, sizeof(batch_hdr), "%08" PRIXPTR ": batch-%03d %s | %s", cmdb_addr + batch.start_addr, batch.id, Gcn::GetOpCodeName(static_cast(batch.type)), batch.marker.c_str()); } else { - snprintf(batch_hdr, sizeof(batch_hdr), "%08llX: batch-%03d %s", + snprintf(batch_hdr, sizeof(batch_hdr), "%08" PRIXPTR ": batch-%03d %s", cmdb_addr + batch.start_addr, batch.id, Gcn::GetOpCodeName(static_cast(batch.type))); } @@ -1348,7 +1349,7 @@ void CmdListViewer::Draw(bool only_batches_view) { } if (show_batch_content) { - auto processed_size = 0ull; + size_t processed_size = 0; auto bb = ctx.LastItemData.Rect; if (group_batches && !ignore_header) { Indent(); @@ -1364,9 +1365,9 @@ void CmdListViewer::Draw(bool only_batches_view) { op = pm4_t3->opcode; char header_name[128]; - sprintf(header_name, "%08llX: %s", - cmdb_addr + batch.start_addr + processed_size, - Gcn::GetOpCodeName((u32)op)); + snprintf(header_name, sizeof(header_name), "%08" PRIXPTR ": %s", + cmdb_addr + batch.start_addr + processed_size, + Gcn::GetOpCodeName(static_cast(op))); bool open_pm4 = TreeNode(header_name); if (!group_batches) { diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 9aede3304..70cf09a97 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -703,7 +703,7 @@ s32 PS4_SYSV_ABI sceGnmDrawIndexIndirectCountMulti(u32* cmdbuf, u32 size, u32 da cmdbuf[3] = (count_addr != 0 ? 1u : 0u) << 0x1e; cmdbuf[4] = max_count; *(u64*)(&cmdbuf[5]) = count_addr; - cmdbuf[7] = AmdGpu::Liverpool::DrawIndexedIndirectArgsSize; + cmdbuf[7] = sizeof(DrawIndexedIndirectArgs); cmdbuf[8] = 0; cmdbuf += 9; diff --git a/src/core/libraries/kernel/file_system.cpp b/src/core/libraries/kernel/file_system.cpp index 56286eb98..c4f1e4799 100644 --- a/src/core/libraries/kernel/file_system.cpp +++ b/src/core/libraries/kernel/file_system.cpp @@ -416,7 +416,7 @@ int PS4_SYSV_ABI sceKernelCheckReachability(const char* path) { return ORBIS_OK; } -s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) { +s64 PS4_SYSV_ABI sceKernelPreadv(int d, SceKernelIovec* iov, int iovcnt, s64 offset) { if (d < 3) { return ORBIS_KERNEL_ERROR_EPERM; } @@ -436,10 +436,19 @@ s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) { file->f.Seek(pos); }; if (!file->f.Seek(offset)) { - LOG_CRITICAL(Kernel_Fs, "sceKernelPread: failed to seek"); + LOG_CRITICAL(Kernel_Fs, "failed to seek"); return ORBIS_KERNEL_ERROR_EINVAL; } - return file->f.ReadRaw(buf, nbytes); + size_t total_read = 0; + for (int i = 0; i < iovcnt; i++) { + total_read += file->f.ReadRaw(iov[i].iov_base, iov[i].iov_len); + } + return total_read; +} + +s64 PS4_SYSV_ABI sceKernelPread(int d, void* buf, size_t nbytes, s64 offset) { + SceKernelIovec iovec{buf, nbytes}; + return sceKernelPreadv(d, &iovec, 1, offset); } int PS4_SYSV_ABI sceKernelFStat(int fd, OrbisKernelStat* sb) { @@ -537,14 +546,13 @@ static int GetDents(int fd, char* buf, int nbytes, s64* basep) { } const auto& entry = file->dirents.at(file->dirents_index++); auto str = entry.name; - auto str_size = str.size() - 1; static int fileno = 1000; // random OrbisKernelDirent* sce_ent = (OrbisKernelDirent*)buf; sce_ent->d_fileno = fileno++; // TODO this should be unique but atm it changes maybe switch to a // hash or something? sce_ent->d_reclen = sizeof(OrbisKernelDirent); sce_ent->d_type = (entry.isFile ? 8 : 4); - sce_ent->d_namlen = str_size; + sce_ent->d_namlen = str.size(); strncpy(sce_ent->d_name, str.c_str(), ORBIS_MAX_PATH); sce_ent->d_name[ORBIS_MAX_PATH] = '\0'; @@ -650,6 +658,7 @@ void RegisterFileSystem(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("E6ao34wPw+U", "libScePosix", 1, "libkernel", 1, 1, posix_stat); LIB_FUNCTION("E6ao34wPw+U", "libkernel", 1, "libkernel", 1, 1, posix_stat); LIB_FUNCTION("+r3rMFwItV4", "libkernel", 1, "libkernel", 1, 1, sceKernelPread); + LIB_FUNCTION("yTj62I7kw4s", "libkernel", 1, "libkernel", 1, 1, sceKernelPreadv); LIB_FUNCTION("uWyW3v98sU4", "libkernel", 1, "libkernel", 1, 1, sceKernelCheckReachability); LIB_FUNCTION("fTx66l5iWIA", "libkernel", 1, "libkernel", 1, 1, sceKernelFsync); LIB_FUNCTION("juWbTNM+8hw", "libkernel", 1, "libkernel", 1, 1, posix_fsync); diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index 9c9c11178..9fcbd4356 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -165,8 +165,7 @@ public: while (it != wait_list.end() && (*it)->priority > waiter->priority) { ++it; } - wait_list.insert(it, waiter); - return it; + return wait_list.insert(it, waiter); } WaitList wait_list; diff --git a/src/core/libraries/libc_internal/libc_internal.cpp b/src/core/libraries/libc_internal/libc_internal.cpp index 8eea41eb3..eb6046c7a 100644 --- a/src/core/libraries/libc_internal/libc_internal.cpp +++ b/src/core/libraries/libc_internal/libc_internal.cpp @@ -3,8 +3,8 @@ #include +#include "common/assert.h" #include "common/logging/log.h" -#include "core/libraries/error_codes.h" #include "core/libraries/libs.h" #include "libc_internal.h" @@ -36,28 +36,184 @@ int PS4_SYSV_ABI internal_strcpy_s(char* dest, size_t dest_size, const char* src #endif } +int PS4_SYSV_ABI internal_strcat_s(char* dest, size_t dest_size, const char* src) { +#ifdef _WIN64 + return strcat_s(dest, dest_size, src); +#else + std::strcat(dest, src); + return 0; // ALL OK +#endif +} + int PS4_SYSV_ABI internal_memcmp(const void* s1, const void* s2, size_t n) { return std::memcmp(s1, s2, n); } +int PS4_SYSV_ABI internal_strcmp(const char* str1, const char* str2) { + return std::strcmp(str1, str2); +} + int PS4_SYSV_ABI internal_strncmp(const char* str1, const char* str2, size_t num) { return std::strncmp(str1, str2, num); } -int PS4_SYSV_ABI internal_strlen(const char* str) { +size_t PS4_SYSV_ABI internal_strlen(const char* str) { return std::strlen(str); } +char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t count) { + return std::strncpy(dest, src, count); +} + +char* PS4_SYSV_ABI internal_strcat(char* dest, const char* src) { + return std::strcat(dest, src); +} + +const char* PS4_SYSV_ABI internal_strchr(const char* str, int c) { + return std::strchr(str, c); +} + +double PS4_SYSV_ABI internal_sin(double x) { + return std::sin(x); +} + +float PS4_SYSV_ABI internal_sinf(float x) { + return std::sinf(x); +} + +double PS4_SYSV_ABI internal_cos(double x) { + return std::cos(x); +} + +float PS4_SYSV_ABI internal_cosf(float x) { + return std::cosf(x); +} + +void PS4_SYSV_ABI internal_sincos(double x, double* sinp, double* cosp) { + *sinp = std::sin(x); + *cosp = std::cos(x); +} + +void PS4_SYSV_ABI internal_sincosf(float x, float* sinp, float* cosp) { + *sinp = std::sinf(x); + *cosp = std::cosf(x); +} + +double PS4_SYSV_ABI internal_tan(double x) { + return std::tan(x); +} + +float PS4_SYSV_ABI internal_tanf(float x) { + return std::tanf(x); +} + +double PS4_SYSV_ABI internal_asin(double x) { + return std::asin(x); +} + +float PS4_SYSV_ABI internal_asinf(float x) { + return std::asinf(x); +} + +double PS4_SYSV_ABI internal_acos(double x) { + return std::acos(x); +} + +float PS4_SYSV_ABI internal_acosf(float x) { + return std::acosf(x); +} + +double PS4_SYSV_ABI internal_atan(double x) { + return std::atan(x); +} + +float PS4_SYSV_ABI internal_atanf(float x) { + return std::atanf(x); +} + +double PS4_SYSV_ABI internal_atan2(double y, double x) { + return std::atan2(y, x); +} + +float PS4_SYSV_ABI internal_atan2f(float y, float x) { + return std::atan2f(y, x); +} + +double PS4_SYSV_ABI internal_exp(double x) { + return std::exp(x); +} + float PS4_SYSV_ABI internal_expf(float x) { - return expf(x); + return std::expf(x); +} + +double PS4_SYSV_ABI internal_exp2(double x) { + return std::exp2(x); +} + +float PS4_SYSV_ABI internal_exp2f(float x) { + return std::exp2f(x); +} + +double PS4_SYSV_ABI internal_pow(double x, double y) { + return std::pow(x, y); +} + +float PS4_SYSV_ABI internal_powf(float x, float y) { + return std::powf(x, y); +} + +double PS4_SYSV_ABI internal_log(double x) { + return std::log(x); +} + +float PS4_SYSV_ABI internal_logf(float x) { + return std::logf(x); +} + +double PS4_SYSV_ABI internal_log10(double x) { + return std::log10(x); +} + +float PS4_SYSV_ABI internal_log10f(float x) { + return std::log10f(x); } void* PS4_SYSV_ABI internal_malloc(size_t size) { return std::malloc(size); } -char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t count) { - return std::strncpy(dest, src, count); +void PS4_SYSV_ABI internal_free(void* ptr) { + std::free(ptr); +} + +void* PS4_SYSV_ABI internal_operator_new(size_t size) { + if (size == 0) { + // Size of 1 is used if 0 is provided. + size = 1; + } + void* ptr = std::malloc(size); + ASSERT_MSG(ptr, "Failed to allocate new object with size {}", size); + return ptr; +} + +void PS4_SYSV_ABI internal_operator_delete(void* ptr) { + if (ptr) { + std::free(ptr); + } +} + +int PS4_SYSV_ABI internal_posix_memalign(void** ptr, size_t alignment, size_t size) { +#ifdef _WIN64 + void* allocated = _aligned_malloc(size, alignment); + if (!allocated) { + return errno; + } + *ptr = allocated; + return 0; +#else + return posix_memalign(ptr, alignment, size); +#endif } void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { @@ -69,17 +225,71 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { internal_memset); LIB_FUNCTION("5Xa2ACNECdo", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strcpy_s); + LIB_FUNCTION("K+gcnFFJKVc", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strcat_s); LIB_FUNCTION("DfivPArhucg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_memcmp); - LIB_FUNCTION("8zsu04XNsZ4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_expf); LIB_FUNCTION("aesyjrHVWy4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strcmp); + LIB_FUNCTION("Ovb2dSJOAuE", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncmp); LIB_FUNCTION("j4ViWNHEgww", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strlen); LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncpy); + LIB_FUNCTION("Ls4tzzhimqQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strcat); + LIB_FUNCTION("ob5xAW4ln-0", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_strchr); + LIB_FUNCTION("H8ya2H00jbI", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_sin); + LIB_FUNCTION("Q4rRL34CEeE", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_sinf); + LIB_FUNCTION("2WE3BTYVwKM", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_cos); + LIB_FUNCTION("-P6FNMzk2Kc", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_cosf); + LIB_FUNCTION("jMB7EFyu30Y", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_sincos); + LIB_FUNCTION("pztV4AF18iI", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_sincosf); + LIB_FUNCTION("T7uyNqP7vQA", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_tan); + LIB_FUNCTION("ZE6RNL+eLbk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_tanf); + LIB_FUNCTION("7Ly52zaL44Q", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_asin); + LIB_FUNCTION("GZWjF-YIFFk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_asinf); + LIB_FUNCTION("JBcgYuW8lPU", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_acos); + LIB_FUNCTION("QI-x0SL8jhw", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_acosf); + LIB_FUNCTION("OXmauLdQ8kY", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_atan); + LIB_FUNCTION("weDug8QD-lE", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_atanf); + LIB_FUNCTION("HUbZmOnT-Dg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_atan2); + LIB_FUNCTION("EH-x713A99c", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_atan2f); + LIB_FUNCTION("NVadfnzQhHQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_exp); + LIB_FUNCTION("8zsu04XNsZ4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_expf); + LIB_FUNCTION("dnaeGXbjP6E", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_exp2); + LIB_FUNCTION("wuAQt-j+p4o", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_exp2f); + LIB_FUNCTION("9LCjpWyQ5Zc", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_pow); + LIB_FUNCTION("1D0H2KNjshE", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_powf); + LIB_FUNCTION("rtV7-jWC6Yg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_log); + LIB_FUNCTION("RQXLbdT2lc4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_logf); + LIB_FUNCTION("WuMbPBKN1TU", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_log10); + LIB_FUNCTION("lhpd6Wk6ccs", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_log10f); LIB_FUNCTION("gQX+4GDQjpM", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_malloc); + LIB_FUNCTION("tIhsqj0qsFE", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_free); + LIB_FUNCTION("fJnpuVVBbKk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_operator_new); + LIB_FUNCTION("hdm0YfMa7TQ", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_operator_new); + LIB_FUNCTION("MLWl90SFWNE", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_operator_delete); + LIB_FUNCTION("z+P+xCnWLBk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_operator_delete); + LIB_FUNCTION("cVSk9y8URbc", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, + internal_posix_memalign); }; } // namespace Libraries::LibcInternal diff --git a/src/images/utils_icon.png b/src/images/utils_icon.png new file mode 100644 index 000000000..7dfa3aa00 Binary files /dev/null and b/src/images/utils_icon.png differ diff --git a/src/main.cpp b/src/main.cpp index b12965677..17b5c11fe 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -32,7 +32,7 @@ int main(int argc, char* argv[]) { " -g, --game Specify game path to launch\n" " -p, --patch Apply specified patch file\n" " -f, --fullscreen Specify window initial fullscreen " - "state. Does not overwrite the config file." + "state. Does not overwrite the config file.\n" " -h, --help Display this help message\n"; exit(0); }}, diff --git a/src/qt_gui/gui_context_menus.h b/src/qt_gui/gui_context_menus.h index 823ad921c..7da7341da 100644 --- a/src/qt_gui/gui_context_menus.h +++ b/src/qt_gui/gui_context_menus.h @@ -451,7 +451,7 @@ private: pShellLink->SetWorkingDirectory((LPCWSTR)QFileInfo(exePath).absolutePath().utf16()); // Set arguments, eboot.bin file location - QString arguments = QString("\"%1\"").arg(targetPath); + QString arguments = QString("-g \"%1\"").arg(targetPath); pShellLink->SetArguments((LPCWSTR)arguments.utf16()); // Set the icon for the shortcut diff --git a/src/qt_gui/main.cpp b/src/qt_gui/main.cpp index 7f9b29200..318245053 100644 --- a/src/qt_gui/main.cpp +++ b/src/qt_gui/main.cpp @@ -45,7 +45,7 @@ int main(int argc, char* argv[]) { " -p, --patch Apply specified patch file\n" " -s, --show-gui Show the GUI\n" " -f, --fullscreen Specify window initial fullscreen " - "state. Does not overwrite the config file." + "state. Does not overwrite the config file.\n" " -h, --help Display this help message\n"; exit(0); }}, diff --git a/src/qt_gui/main_window.cpp b/src/qt_gui/main_window.cpp index 8f4fdfadb..6e5242012 100644 --- a/src/qt_gui/main_window.cpp +++ b/src/qt_gui/main_window.cpp @@ -106,6 +106,7 @@ void MainWindow::CreateActions() { m_list_mode_act_group = new QActionGroup(this); m_list_mode_act_group->addAction(ui->setlistModeListAct); m_list_mode_act_group->addAction(ui->setlistModeGridAct); + m_list_mode_act_group->addAction(ui->setlistElfAct); // create action group for themes m_theme_act_group = new QActionGroup(this); @@ -372,7 +373,7 @@ void MainWindow::CreateConnects() { ui->sizeSlider->setEnabled(true); ui->sizeSlider->setSliderPosition(slider_pos_grid); }); - // Elf + // Elf Viewer connect(ui->setlistElfAct, &QAction::triggered, m_dock_widget.data(), [this]() { BackgroundMusicPlayer::getInstance().stopMusic(); m_dock_widget->setWidget(m_elf_viewer.data()); @@ -632,10 +633,12 @@ void MainWindow::ConfigureGuiFromSettings() { Config::getMainWindowGeometryW(), Config::getMainWindowGeometryH()); ui->showGameListAct->setChecked(true); - if (isTableList) { + if (Config::getTableMode() == 0) { ui->setlistModeListAct->setChecked(true); - } else { + } else if (Config::getTableMode() == 1) { ui->setlistModeGridAct->setChecked(true); + } else if (Config::getTableMode() == 2) { + ui->setlistElfAct->setChecked(true); } BackgroundMusicPlayer::getInstance().setVolume(Config::getBGMvolume()); } @@ -967,6 +970,7 @@ void MainWindow::SetUiIcons(bool isWhite) { ui->gameInstallPathAct->setIcon(RecolorIcon(ui->gameInstallPathAct->icon(), isWhite)); ui->menuThemes->setIcon(RecolorIcon(ui->menuThemes->icon(), isWhite)); ui->menuGame_List_Icons->setIcon(RecolorIcon(ui->menuGame_List_Icons->icon(), isWhite)); + ui->menuUtils->setIcon(RecolorIcon(ui->menuUtils->icon(), isWhite)); ui->playButton->setIcon(RecolorIcon(ui->playButton->icon(), isWhite)); ui->pauseButton->setIcon(RecolorIcon(ui->pauseButton->icon(), isWhite)); ui->stopButton->setIcon(RecolorIcon(ui->stopButton->icon(), isWhite)); diff --git a/src/qt_gui/main_window_ui.h b/src/qt_gui/main_window_ui.h index a51e37d1e..5ff572f86 100644 --- a/src/qt_gui/main_window_ui.h +++ b/src/qt_gui/main_window_ui.h @@ -110,15 +110,14 @@ public: setIconSizeLargeAct->setCheckable(true); setlistModeListAct = new QAction(MainWindow); setlistModeListAct->setObjectName("setlistModeListAct"); - setlistModeListAct->setCheckable(true); - setlistModeListAct->setChecked(true); setlistModeListAct->setIcon(QIcon(":images/list_icon.png")); + setlistModeListAct->setCheckable(true); setlistModeGridAct = new QAction(MainWindow); setlistModeGridAct->setObjectName("setlistModeGridAct"); - setlistModeGridAct->setCheckable(true); setlistModeGridAct->setIcon(QIcon(":images/grid_icon.png")); + setlistModeGridAct->setCheckable(true); setlistElfAct = new QAction(MainWindow); - setlistElfAct->setObjectName("setlistModeGridAct"); + setlistElfAct->setObjectName("setlistElfAct"); setlistElfAct->setCheckable(true); gameInstallPathAct = new QAction(MainWindow); gameInstallPathAct->setObjectName("gameInstallPathAct"); @@ -250,6 +249,7 @@ public: menuSettings->setObjectName("menuSettings"); menuUtils = new QMenu(menuSettings); menuUtils->setObjectName("menuUtils"); + menuUtils->setIcon(QIcon(":images/utils_icon.png")); menuThemes = new QMenu(menuView); menuThemes->setObjectName("menuThemes"); menuThemes->setIcon(QIcon(":images/themes_icon.png")); diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts index 5715371bf..f7144a001 100644 --- a/src/qt_gui/translations/sq.ts +++ b/src/qt_gui/translations/sq.ts @@ -118,22 +118,22 @@ Open Folder... - Hapni Dosjen... + Hap Dosjen... Open Game Folder - Hapni Dosjen e Lojës + Hap Dosjen e Lojës Open Save Data Folder - Hapni Dosjen e të Dhënave të Ruajtura + Hap Dosjen e të Dhënave të Ruajtura Open Log Folder - Hapni Dosjen e Regjistrimeve + Hap Dosjen e Ditarit diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index f9cbacaf2..c7ae2a1e5 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -17,6 +17,7 @@ #include "shader_recompiler/ir/type.h" #include "shader_recompiler/params.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/amdgpu/resource.h" namespace Shader { @@ -251,10 +252,10 @@ struct Info { bnd.user_data += ud_mask.NumRegs(); } - [[nodiscard]] std::pair GetDrawOffsets() const { - u32 vertex_offset = 0; + [[nodiscard]] std::pair GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs) const { + u32 vertex_offset = regs.index_offset; u32 instance_offset = 0; - if (vertex_offset_sgpr != -1) { + if (vertex_offset == 0 && vertex_offset_sgpr != -1) { vertex_offset = user_data[vertex_offset_sgpr]; } if (instance_offset_sgpr != -1) { diff --git a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp index 6292edfd8..ef9319891 100644 --- a/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp +++ b/src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp @@ -148,17 +148,21 @@ static void GenerateSrtProgram(Info& info, PassInfo& pass_info) { // Special case for V# step rate buffers in fetch shader for (const auto [sgpr_base, dword_offset, num_dwords] : info.srt_info.srt_reservations) { // get pointer to V# - c.mov(r10d, ptr[rdi + (sgpr_base << 2)]); - + if (sgpr_base != IR::NumScalarRegs) { + PushPtr(c, sgpr_base); + } u32 src_off = dword_offset << 2; for (auto j = 0; j < num_dwords; j++) { - c.mov(r11d, ptr[r10d + src_off]); + c.mov(r11d, ptr[rdi + src_off]); c.mov(ptr[rsi + (pass_info.dst_off_dw << 2)], r11d); src_off += 4; ++pass_info.dst_off_dw; } + if (sgpr_base != IR::NumScalarRegs) { + PopPtr(c); + } } ASSERT(pass_info.dst_off_dw == info.srt_info.flattened_bufsize_dw); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 03936f3a8..4662def93 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -20,7 +20,7 @@ enum class Stage : u32 { Local, Compute, }; -constexpr u32 MaxStageTypes = 6; +constexpr u32 MaxStageTypes = 7; [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); diff --git a/src/shadps4.qrc b/src/shadps4.qrc index a59cb0621..e328f2c42 100644 --- a/src/shadps4.qrc +++ b/src/shadps4.qrc @@ -6,6 +6,7 @@ images/play_icon.png images/pause_icon.png images/stop_icon.png + images/utils_icon.png images/file_icon.png images/folder_icon.png images/themes_icon.png diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 12b5de436..f7b710edd 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -410,7 +410,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; - const auto size = sizeof(PM4CmdDrawIndirect::DrawInstancedArgs); + const auto size = sizeof(DrawIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -427,7 +427,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); const auto offset = draw_index_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; - const auto size = sizeof(PM4CmdDrawIndexIndirect::DrawIndexInstancedArgs); + const auto size = sizeof(DrawIndexedIndirectArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -442,10 +442,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); + reinterpret_cast(header); const auto offset = draw_index_indirect->data_offset; const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr; - const auto size = sizeof(PM4CmdDrawIndexIndirect::DrawIndexInstancedArgs); if (DebugState.DumpingCurrentReg()) { DebugState.PushRegsDump(base_addr, reinterpret_cast(header), regs); } @@ -453,7 +452,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); rasterizer->ScopeMarkerBegin( fmt::format("dcb:{}:DrawIndexIndirectCountMulti", cmd_address)); - rasterizer->DrawIndirect(true, ib_address, offset, size, + rasterizer->DrawIndirect(true, ib_address, offset, draw_index_indirect->stride, draw_index_indirect->count, draw_index_indirect->countAddr); rasterizer->ScopeMarkerEnd(); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 0595a242c..2b2f2c00a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -57,8 +57,6 @@ struct Liverpool { static constexpr u32 ConfigRegWordOffset = 0x2000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; - static constexpr u32 DrawIndirectArgsSize = 0x10u; - static constexpr u32 DrawIndexedIndirectArgsSize = 0x14u; using UserData = std::array; @@ -1113,7 +1111,8 @@ struct Liverpool { INSERT_PADDING_WORDS(2); std::array viewport_scissors; std::array viewport_depths; - INSERT_PADDING_WORDS(0xA103 - 0xA0D4); + INSERT_PADDING_WORDS(0xA102 - 0xA0D4); + u32 index_offset; u32 primitive_restart_index; INSERT_PADDING_WORDS(1); BlendConstants blend_constants; @@ -1382,6 +1381,7 @@ static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(generic_scissor) == 0xA090); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); +static_assert(GFX6_3D_REG_INDEX(index_offset) == 0xA102); static_assert(GFX6_3D_REG_INDEX(primitive_restart_index) == 0xA103); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index d6cab23d2..be6751285 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -778,14 +778,15 @@ struct PM4CmdDispatchIndirect { u32 dispatch_initiator; ///< Dispatch Initiator Register }; -struct PM4CmdDrawIndirect { - struct DrawInstancedArgs { - u32 vertex_count_per_instance; - u32 instance_count; - u32 start_vertex_location; - u32 start_instance_location; - }; +struct DrawIndirectArgs { + u32 vertex_count_per_instance; + u32 instance_count; + u32 start_vertex_location; + u32 start_instance_location; +}; +static_assert(sizeof(DrawIndirectArgs) == 0x10u); +struct PM4CmdDrawIndirect { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -801,15 +802,16 @@ struct PM4CmdDrawIndirect { u32 draw_initiator; ///< Draw Initiator Register }; -struct PM4CmdDrawIndexIndirect { - struct DrawIndexInstancedArgs { - u32 index_count_per_instance; - u32 instance_count; - u32 start_index_location; - u32 base_vertex_location; - u32 start_instance_location; - }; +struct DrawIndexedIndirectArgs { + u32 index_count_per_instance; + u32 instance_count; + u32 start_index_location; + u32 base_vertex_location; + u32 start_instance_location; +}; +static_assert(sizeof(DrawIndexedIndirectArgs) == 0x14u); +struct PM4CmdDrawIndexIndirect { PM4Type3Header header; ///< header u32 data_offset; ///< Byte aligned offset where the required data structure starts union { @@ -822,16 +824,29 @@ struct PM4CmdDrawIndexIndirect { BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the ///< StartInstanceLocation it fetched from memory }; + u32 draw_initiator; ///< Draw Initiator Register +}; +struct PM4CmdDrawIndexIndirectMulti { + PM4Type3Header header; ///< header + u32 data_offset; ///< Byte aligned offset where the required data structure starts + union { + u32 dw2; + BitField<0, 16, u32> base_vtx_loc; ///< Offset where the CP will write the + ///< BaseVertexLocation it fetched from memory + }; + union { + u32 dw3; + BitField<0, 16, u32> start_inst_loc; ///< Offset where the CP will write the + ///< StartInstanceLocation it fetched from memory + }; union { u32 dw4; - struct { - BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count - BitField<30, 1, u32> - countIndirectEnable; ///< Indicates the data structure count is in memory - BitField<31, 1, u32> - drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC - }; + BitField<0, 16, u32> drawIndexLoc; ///< register offset to write the Draw Index count + BitField<30, 1, u32> + countIndirectEnable; ///< Indicates the data structure count is in memory + BitField<31, 1, u32> + drawIndexEnable; ///< Enables writing of Draw Index count to DRAW_INDEX_LOC }; u32 count; ///< Count of data structures to loop through before going to next packet u64 countAddr; ///< DWord aligned Address[31:2]; Valid if countIndirectEnable is set diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 42d3deba7..77b353c2f 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -620,10 +620,10 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) { static constexpr FindFlags find_flags = FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize; - ImageInfo info{}; - info.guest_address = device_addr; - info.guest_size_bytes = size; - const ImageId image_id = texture_cache.FindImage(info, find_flags); + TextureCache::BaseDesc desc{}; + desc.info.guest_address = device_addr; + desc.info.guest_size_bytes = size; + const ImageId image_id = texture_cache.FindImage(desc, find_flags); if (!image_id) { return false; } @@ -635,7 +635,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); boost::container::small_vector copies; - u32 offset = buffer.Offset(image.cpu_addr); + u32 offset = buffer.Offset(image.info.guest_address); const u32 num_layers = image.info.resources.layers; const u32 max_offset = offset + size; for (u32 m = 0; m < image.info.resources.levels; m++) { diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index 8c20ee6ed..d26a7067a 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -114,8 +114,8 @@ struct PageManager::Impl { // Notify rasterizer about the fault. const VAddr addr = msg.arg.pagefault.address; - const VAddr addr_page = Common::AlignDown(addr, PAGESIZE); - rasterizer->InvalidateMemory(addr_page, PAGESIZE); + const VAddr addr_page = GetPageAddr(addr); + rasterizer->InvalidateMemory(addr, addr_page, PAGESIZE); } } @@ -157,8 +157,8 @@ struct PageManager::Impl { const auto addr = reinterpret_cast(fault_address); const bool is_write = Common::IsWriteError(context); if (is_write && owned_ranges.find(addr) != owned_ranges.end()) { - const VAddr addr_aligned = Common::AlignDown(addr, PAGESIZE); - rasterizer->InvalidateMemory(addr_aligned, PAGESIZE); + const VAddr addr_aligned = GetPageAddr(addr); + rasterizer->InvalidateMemory(addr, addr_aligned, PAGESIZE); return true; } return false; @@ -174,6 +174,14 @@ PageManager::PageManager(Vulkan::Rasterizer* rasterizer_) PageManager::~PageManager() = default; +VAddr PageManager::GetPageAddr(VAddr addr) { + return Common::AlignDown(addr, PAGESIZE); +} + +VAddr PageManager::GetNextPageAddr(VAddr addr) { + return Common::AlignUp(addr + 1, PAGESIZE); +} + void PageManager::OnGpuMap(VAddr address, size_t size) { impl->OnMap(address, size); } diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 0dc022aa5..29a946a8f 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -28,6 +28,9 @@ public: /// Increase/decrease the number of surface in pages touching the specified region void UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta); + static VAddr GetPageAddr(VAddr addr); + static VAddr GetNextPageAddr(VAddr addr); + private: struct Impl; std::unique_ptr impl; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 4ab290780..09d4e4195 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -15,8 +15,10 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache, u64 compute_key_, const Shader::Info& info_, vk::ShaderModule module) - : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_}, - info{&info_} { + : Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} { + auto& info = stages[int(Shader::Stage::Compute)]; + info = &info_; + const vk::PipelineShaderStageCreateInfo shader_ci = { .stage = vk::ShaderStageFlagBits::eCompute, .module = module, @@ -118,90 +120,4 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler ComputePipeline::~ComputePipeline() = default; -bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, - VideoCore::TextureCache& texture_cache) const { - // Bind resource buffers and textures. - boost::container::small_vector set_writes; - BufferBarriers buffer_barriers; - Shader::PushData push_data{}; - Shader::Backend::Bindings binding{}; - - info->PushUd(binding, push_data); - - buffer_infos.clear(); - buffer_views.clear(); - image_infos.clear(); - - // Most of the time when a metadata is updated with a shader it gets cleared. It means - // we can skip the whole dispatch and update the tracked state instead. Also, it is not - // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we - // will need its full emulation anyways. For cases of metadata read a warning will be logged. - const auto IsMetaUpdate = [&](const auto& desc) { - const VAddr address = desc.GetSharp(*info).base_address; - if (desc.is_written) { - if (texture_cache.TouchMeta(address, true)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return true; - } - } else { - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); - } - } - return false; - }; - - for (const auto& desc : info->buffers) { - if (desc.is_gds_buffer) { - continue; - } - if (IsMetaUpdate(desc)) { - return false; - } - } - for (const auto& desc : info->texture_buffers) { - if (IsMetaUpdate(desc)) { - return false; - } - } - - BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, set_writes, - buffer_barriers); - - BindTextures(texture_cache, *info, binding, set_writes); - - if (set_writes.empty()) { - return false; - } - - const auto cmdbuf = scheduler.CommandBuffer(); - if (!buffer_barriers.empty()) { - const auto dependencies = vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = u32(buffer_barriers.size()), - .pBufferMemoryBarriers = buffer_barriers.data(), - }; - scheduler.EndRendering(); - cmdbuf.pipelineBarrier2(dependencies); - } - - cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data), - &push_data); - - // Bind descriptor set. - if (uses_push_descriptors) { - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, - set_writes); - return true; - } - const auto desc_set = desc_heap.Commit(*desc_layout); - for (auto& set_write : set_writes) { - set_write.dstSet = desc_set; - } - instance.GetDevice().updateDescriptorSets(set_writes, {}); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, {}); - - return true; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index f1bc7285a..ca429b58d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -24,13 +24,8 @@ public: vk::ShaderModule module); ~ComputePipeline(); - bool BindResources(VideoCore::BufferCache& buffer_cache, - VideoCore::TextureCache& texture_cache) const; - private: u64 compute_key; - const Shader::Info* info; - bool uses_push_descriptors{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 32e3bf8f8..d0d16ac75 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -16,10 +16,6 @@ namespace Vulkan { -static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex | - vk::ShaderStageFlagBits::eGeometry | - vk::ShaderStageFlagBits::eFragment; - GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_, vk::PipelineCache pipeline_cache, @@ -389,67 +385,4 @@ void GraphicsPipeline::BuildDescSetLayout() { desc_layout = std::move(layout); } -void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, - VideoCore::BufferCache& buffer_cache, - VideoCore::TextureCache& texture_cache) const { - // Bind resource buffers and textures. - boost::container::small_vector set_writes; - BufferBarriers buffer_barriers; - Shader::PushData push_data{}; - Shader::Backend::Bindings binding{}; - - buffer_infos.clear(); - buffer_views.clear(); - image_infos.clear(); - - for (const auto* stage : stages) { - if (!stage) { - continue; - } - if (stage->uses_step_rates) { - push_data.step0 = regs.vgt_instance_step_rate_0; - push_data.step1 = regs.vgt_instance_step_rate_1; - } - stage->PushUd(binding, push_data); - - BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data, set_writes, - buffer_barriers); - - BindTextures(texture_cache, *stage, binding, set_writes); - } - - const auto cmdbuf = scheduler.CommandBuffer(); - SCOPE_EXIT { - cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle()); - }; - - if (set_writes.empty()) { - return; - } - - if (!buffer_barriers.empty()) { - const auto dependencies = vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = u32(buffer_barriers.size()), - .pBufferMemoryBarriers = buffer_barriers.data(), - }; - scheduler.EndRendering(); - cmdbuf.pipelineBarrier2(dependencies); - } - - // Bind descriptor set. - if (uses_push_descriptors) { - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, - set_writes); - return; - } - const auto desc_set = desc_heap.Commit(*desc_layout); - for (auto& set_write : set_writes) { - set_write.dstSet = desc_set; - } - instance.GetDevice().updateDescriptorSets(set_writes, {}); - cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, desc_set, {}); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index f7762eb12..4f4abfd16 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include + #include "common/types.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -14,8 +15,8 @@ class TextureCache; namespace Vulkan { -static constexpr u32 MaxVertexBufferCount = 32; static constexpr u32 MaxShaderStages = 5; +static constexpr u32 MaxVertexBufferCount = 32; class Instance; class Scheduler; @@ -61,13 +62,6 @@ public: std::span modules); ~GraphicsPipeline(); - void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache, - VideoCore::TextureCache& texture_cache) const; - - const Shader::Info& GetStage(Shader::Stage stage) const noexcept { - return *stages[u32(stage)]; - } - bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash; @@ -99,9 +93,7 @@ private: void BuildDescSetLayout(); private: - std::array stages{}; GraphicsPipelineKey key; - bool uses_push_descriptors{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7e44bbf09..43facbae4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -38,8 +38,6 @@ struct Program { }; class PipelineCache { - static constexpr size_t MaxShaderStages = 5; - public: explicit PipelineCache(const Instance& instance, Scheduler& scheduler, AmdGpu::Liverpool* liverpool); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp index 4c297cd42..6b48a40a0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.cpp @@ -12,230 +12,47 @@ namespace Vulkan { -boost::container::static_vector Pipeline::image_infos; -boost::container::static_vector Pipeline::buffer_views; -boost::container::static_vector Pipeline::buffer_infos; - Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, - vk::PipelineCache pipeline_cache) - : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {} + vk::PipelineCache pipeline_cache, bool is_compute_ /*= false*/) + : instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, is_compute{is_compute_} {} Pipeline::~Pipeline() = default; -void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, - VideoCore::TextureCache& texture_cache, const Shader::Info& stage, - Shader::Backend::Bindings& binding, Shader::PushData& push_data, - DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const { - using BufferBindingInfo = std::pair; - static boost::container::static_vector buffer_bindings; +void Pipeline::BindResources(DescriptorWrites& set_writes, const BufferBarriers& buffer_barriers, + const Shader::PushData& push_data) const { + const auto cmdbuf = scheduler.CommandBuffer(); + const auto bind_point = + IsCompute() ? vk::PipelineBindPoint::eCompute : vk::PipelineBindPoint::eGraphics; - buffer_bindings.clear(); - - for (const auto& desc : stage.buffers) { - const auto vsharp = desc.GetSharp(stage); - if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) { - const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); - buffer_bindings.emplace_back(buffer_id, vsharp); - } else { - buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp); - } + if (!buffer_barriers.empty()) { + const auto dependencies = vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = u32(buffer_barriers.size()), + .pBufferMemoryBarriers = buffer_barriers.data(), + }; + scheduler.EndRendering(); + cmdbuf.pipelineBarrier2(dependencies); } - using TexBufferBindingInfo = std::pair; - static boost::container::static_vector texbuffer_bindings; + const auto stage_flags = IsCompute() ? vk::ShaderStageFlagBits::eCompute : gp_stage_flags; + cmdbuf.pushConstants(*pipeline_layout, stage_flags, 0u, sizeof(push_data), &push_data); - texbuffer_bindings.clear(); - - for (const auto& desc : stage.texture_buffers) { - const auto vsharp = desc.GetSharp(stage); - if (vsharp.base_address != 0 && vsharp.GetSize() > 0 && - vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); - texbuffer_bindings.emplace_back(buffer_id, vsharp); - } else { - texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp); - } + // Bind descriptor set. + if (set_writes.empty()) { + return; } - // Bind the flattened user data buffer as a UBO so it's accessible to the shader - if (stage.has_readconst) { - const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf); - buffer_infos.emplace_back(vk_buffer->Handle(), offset, - stage.flattened_ud_buf.size() * sizeof(u32)); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); - ++binding.buffer; + if (uses_push_descriptors) { + cmdbuf.pushDescriptorSetKHR(bind_point, *pipeline_layout, 0, set_writes); + return; } - // Second pass to re-bind buffers that were updated after binding - for (u32 i = 0; i < buffer_bindings.size(); i++) { - const auto& [buffer_id, vsharp] = buffer_bindings[i]; - const auto& desc = stage.buffers[i]; - const bool is_storage = desc.IsStorage(vsharp); - if (!buffer_id) { - if (desc.is_gds_buffer) { - const auto* gds_buf = buffer_cache.GetGdsBuffer(); - buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes()); - } else if (instance.IsNullDescriptorSupported()) { - buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); - } else { - auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID); - buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); - } - } else { - const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( - vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id); - const u32 alignment = - is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - ASSERT(adjust % 4 == 0); - push_data.AddOffset(binding.buffer, adjust); - buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, - vsharp.GetSize() + adjust); - } - - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); - ++binding.buffer; - } - - const auto null_buffer_view = - instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); - for (u32 i = 0; i < texbuffer_bindings.size(); i++) { - const auto& [buffer_id, vsharp] = texbuffer_bindings[i]; - const auto& desc = stage.texture_buffers[i]; - vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); - if (buffer_id) { - const u32 alignment = instance.TexelBufferMinAlignment(); - const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( - vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); - const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), - "Texel buffer stride must match format stride"); - const u32 offset_aligned = Common::AlignDown(offset, alignment); - const u32 adjust = offset - offset_aligned; - ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding.buffer, adjust / fmt_stride); - buffer_view = - vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, - vsharp.GetDataFmt(), vsharp.GetNumberFmt()); - if (auto barrier = - vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite - : vk::AccessFlagBits2::eShaderRead, - vk::PipelineStageFlagBits2::eComputeShader)) { - buffer_barriers.emplace_back(*barrier); - } - if (desc.is_written) { - texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize()); - } - } - - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer - : vk::DescriptorType::eUniformTexelBuffer, - .pTexelBufferView = &buffer_view, - }); - ++binding.buffer; - } -} - -void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, - Shader::Backend::Bindings& binding, - DescriptorWrites& set_writes) const { - - using ImageBindingInfo = std::tuple; - static boost::container::static_vector image_bindings; - - image_bindings.clear(); - - for (const auto& image_desc : stage.images) { - const auto tsharp = image_desc.GetSharp(stage); - if (tsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { - VideoCore::ImageInfo image_info{tsharp, image_desc}; - const auto image_id = texture_cache.FindImage(image_info); - auto& image = texture_cache.GetImage(image_id); - image.flags |= VideoCore::ImageFlagBits::Bound; - image_bindings.emplace_back(image_id, tsharp, image_desc); - } else { - image_bindings.emplace_back(VideoCore::ImageId{}, tsharp, image_desc); - } - - if (texture_cache.IsMeta(tsharp.Address())) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)"); - } - } - - // Second pass to re-bind images that were updated after binding - for (auto [image_id, tsharp, desc] : image_bindings) { - if (!image_id) { - if (instance.IsNullDescriptorSupported()) { - image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - } else { - auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); - image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, - vk::ImageLayout::eGeneral); - } - } else { - auto& image = texture_cache.GetImage(image_id); - if (True(image.flags & VideoCore::ImageFlagBits::NeedsRebind)) { - image_id = texture_cache.FindImage(image.info); - } - VideoCore::ImageViewInfo view_info{tsharp, desc}; - auto& image_view = texture_cache.FindTexture(image_id, view_info); - image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, - texture_cache.GetImage(image_id).last_state.layout); - image.flags &= - ~(VideoCore::ImageFlagBits::NeedsRebind | VideoCore::ImageFlagBits::Bound); - } - - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = desc.is_storage ? vk::DescriptorType::eStorageImage - : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), - }); - } - - for (const auto& sampler : stage.samplers) { - auto ssharp = sampler.GetSharp(stage); - if (sampler.disable_aniso) { - const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage); - if (tsharp.base_level == 0 && tsharp.last_level == 0) { - ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); - } - } - const auto vk_sampler = texture_cache.GetSampler(ssharp); - image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampler, - .pImageInfo = &image_infos.back(), - }); + const auto desc_set = desc_heap.Commit(*desc_layout); + for (auto& set_write : set_writes) { + set_write.dstSet = desc_set; } + instance.GetDevice().updateDescriptorSets(set_writes, {}); + cmdbuf.bindDescriptorSets(bind_point, *pipeline_layout, 0, desc_set, {}); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index 75764bfa6..8c48c83f7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -6,14 +6,18 @@ #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" #include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/texture_cache/texture_cache.h" namespace VideoCore { class BufferCache; -class TextureCache; } // namespace VideoCore namespace Vulkan { +static constexpr auto gp_stage_flags = vk::ShaderStageFlagBits::eVertex | + vk::ShaderStageFlagBits::eGeometry | + vk::ShaderStageFlagBits::eFragment; + class Instance; class Scheduler; class DescriptorHeap; @@ -21,7 +25,7 @@ class DescriptorHeap; class Pipeline { public: Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap, - vk::PipelineCache pipeline_cache); + vk::PipelineCache pipeline_cache, bool is_compute = false); virtual ~Pipeline(); vk::Pipeline Handle() const noexcept { @@ -32,16 +36,27 @@ public: return *pipeline_layout; } + auto GetStages() const { + if (is_compute) { + return std::span{stages.cend() - 1, stages.cend()}; + } else { + return std::span{stages.cbegin(), stages.cend() - 1}; + } + } + + const Shader::Info& GetStage(Shader::Stage stage) const noexcept { + return *stages[u32(stage)]; + } + + bool IsCompute() const { + return is_compute; + } + using DescriptorWrites = boost::container::small_vector; using BufferBarriers = boost::container::small_vector; - void BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache, - const Shader::Info& stage, Shader::Backend::Bindings& binding, - Shader::PushData& push_data, DescriptorWrites& set_writes, - BufferBarriers& buffer_barriers) const; - - void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, - Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const; + void BindResources(DescriptorWrites& set_writes, const BufferBarriers& buffer_barriers, + const Shader::PushData& push_data) const; protected: const Instance& instance; @@ -50,9 +65,9 @@ protected: vk::UniquePipeline pipeline; vk::UniquePipelineLayout pipeline_layout; vk::UniqueDescriptorSetLayout desc_layout; - static boost::container::static_vector image_infos; - static boost::container::static_vector buffer_views; - static boost::container::static_vector buffer_infos; + std::array stages{}; + bool uses_push_descriptors{}; + const bool is_compute; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_presenter.h b/src/video_core/renderer_vulkan/vk_presenter.h index cb44a352a..4d9226dec 100644 --- a/src/video_core/renderer_vulkan/vk_presenter.h +++ b/src/video_core/renderer_vulkan/vk_presenter.h @@ -55,8 +55,8 @@ public: Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address, bool is_eop) { - const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info); + auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; + const auto image_id = texture_cache.FindImage(desc); texture_cache.UpdateImage(image_id, is_eop ? nullptr : &flip_scheduler); return PrepareFrameInternal(image_id, is_eop); } @@ -68,9 +68,11 @@ public: VideoCore::Image& RegisterVideoOutSurface( const Libraries::VideoOut::BufferAttributeGroup& attribute, VAddr cpu_address) { vo_buffers_addr.emplace_back(cpu_address); - const auto info = VideoCore::ImageInfo{attribute, cpu_address}; - const auto image_id = texture_cache.FindImage(info); - return texture_cache.GetImage(image_id); + auto desc = VideoCore::TextureCache::VideoOutDesc{attribute, cpu_address}; + const auto image_id = texture_cache.FindImage(desc); + auto& image = texture_cache.GetImage(image_id); + image.usage.vo_surface = 1u; + return image; } bool IsVideoOutSurface(const AmdGpu::Liverpool::ColorBuffer& color_buffer) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a8b4728c0..a6fc872d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -75,6 +75,105 @@ bool Rasterizer::FilterDraw() { return true; } +RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { + // Prefetch color and depth buffers to let texture cache handle possible overlaps with bound + // textures (e.g. mipgen) + RenderState state; + + cb_descs.clear(); + db_desc.reset(); + + const auto& regs = liverpool->regs; + + if (regs.color_control.degamma_enable) { + LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); + } + + for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { + const auto& col_buf = regs.color_buffers[col_buf_id]; + if (!col_buf) { + continue; + } + + // If the color buffer is still bound but rendering to it is disabled by the target + // mask, we need to prevent the render area from being affected by unbound render target + // extents. + if (!regs.color_target_mask.GetMask(col_buf_id)) { + continue; + } + + // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform + // an unnecessary transition and may result in state conflict if the resource is already + // bound for reading. + if ((mrt_mask & (1 << col_buf_id)) == 0) { + continue; + } + + const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); + texture_cache.TouchMeta(col_buf.CmaskAddress(), false); + + const auto& hint = liverpool->last_cb_extent[col_buf_id]; + auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{}, + std::tuple{col_buf, hint}); + const auto& image_view = texture_cache.FindRenderTarget(desc); + image_id = bound_images.emplace_back(image_view.image_id); + auto& image = texture_cache.GetImage(image_id); + image.binding.is_target = 1u; + + const auto mip = image_view.info.range.base.level; + state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); + state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); + state.color_images[state.num_color_attachments] = image.image; + state.color_attachments[state.num_color_attachments++] = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = + is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, + }; + } + + using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat; + using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat; + if (regs.depth_buffer.Address() != 0 && + ((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) || + (regs.depth_control.stencil_enable && + regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { + const auto htile_address = regs.depth_htile_data_base.GetAddress(); + const bool is_clear = regs.depth_render_control.depth_clear_enable || + texture_cache.IsMetaCleared(htile_address); + const auto& hint = liverpool->last_db_extent; + auto& [image_id, desc] = + db_desc.emplace(std::piecewise_construct, std::tuple{}, + std::tuple{regs.depth_buffer, regs.depth_view, regs.depth_control, + htile_address, hint}); + const auto& image_view = texture_cache.FindDepthTarget(desc); + image_id = bound_images.emplace_back(image_view.image_id); + auto& image = texture_cache.GetImage(image_id); + image.binding.is_target = 1u; + + state.width = std::min(state.width, image.info.size.width); + state.height = std::min(state.height, image.info.size.height); + state.depth_image = image.image; + state.depth_attachment = { + .imageView = *image_view.image_view, + .imageLayout = vk::ImageLayout::eUndefined, + .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, + .stencil = regs.stencil_clear}}, + }; + texture_cache.TouchMeta(htile_address, false); + state.has_depth = + regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid; + state.has_stencil = regs.depth_buffer.stencil_info.format != + AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; + } + + return state; +} + void Rasterizer::Draw(bool is_indexed, u32 index_offset) { RENDERER_TRACE; @@ -82,27 +181,29 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { return; } - const auto cmdbuf = scheduler.CommandBuffer(); const auto& regs = liverpool->regs; const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); if (!pipeline) { return; } - try { - pipeline->BindResources(regs, buffer_cache, texture_cache); - } catch (...) { - UNREACHABLE(); + auto state = PrepareRenderState(pipeline->GetMrtMask()); + + if (!BindResources(pipeline)) { + return; } const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); buffer_cache.BindVertexBuffers(vs_info); const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset); - BeginRendering(*pipeline); + BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); - const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(); + const auto [vertex_offset, instance_offset] = vs_info.GetDrawOffsets(regs); + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, s32(vertex_offset), @@ -113,9 +214,11 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { cmdbuf.draw(num_vertices, regs.num_instances.NumInstances(), vertex_offset, instance_offset); } + + ResetBindings(); } -void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 size, +void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u32 stride, u32 max_count, VAddr count_address) { RENDERER_TRACE; @@ -124,25 +227,34 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& regs = liverpool->regs; - const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); - if (!pipeline) { + if (regs.primitive_type == AmdGpu::PrimitiveType::QuadList) { + // For QuadList we use generated index buffer to convert quads to triangles. Since it + // changes type of the draw, arguments are not valid for this case. We need to run a + // conversion pass to repack the indirect arguments buffer first. + LOG_WARNING(Render_Vulkan, "QuadList primitive type is not supported for indirect draw"); return; } ASSERT_MSG(regs.primitive_type != AmdGpu::PrimitiveType::RectList, "Unsupported primitive type for indirect draw"); - try { - pipeline->BindResources(regs, buffer_cache, texture_cache); - } catch (...) { - UNREACHABLE(); + const GraphicsPipeline* pipeline = pipeline_cache.GetGraphicsPipeline(); + if (!pipeline) { + return; + } + + auto state = PrepareRenderState(pipeline->GetMrtMask()); + + if (!BindResources(pipeline)) { + return; } const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); buffer_cache.BindVertexBuffers(vs_info); buffer_cache.BindIndexBuffer(is_indexed, 0); - const auto [buffer, base] = buffer_cache.ObtainBuffer(arg_address + offset, size, false); + const auto& [buffer, base] = + buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count, false); VideoCore::Buffer* count_buffer{}; u32 count_base{}; @@ -150,36 +262,36 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false); } - BeginRendering(*pipeline); + BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); // We can safely ignore both SGPR UD indices and results of fetch shader parsing, as vertex and // instance offsets will be automatically applied by Vulkan from indirect args buffer. const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + if (is_indexed) { - static_assert(sizeof(VkDrawIndexedIndirectCommand) == - AmdGpu::Liverpool::DrawIndexedIndirectArgsSize); + ASSERT(sizeof(VkDrawIndexedIndirectCommand) == stride); if (count_address != 0) { cmdbuf.drawIndexedIndirectCount(buffer->Handle(), base, count_buffer->Handle(), - count_base, max_count, - AmdGpu::Liverpool::DrawIndexedIndirectArgsSize); + count_base, max_count, stride); } else { - cmdbuf.drawIndexedIndirect(buffer->Handle(), base, max_count, - AmdGpu::Liverpool::DrawIndexedIndirectArgsSize); + cmdbuf.drawIndexedIndirect(buffer->Handle(), base, max_count, stride); } } else { - static_assert(sizeof(VkDrawIndirectCommand) == AmdGpu::Liverpool::DrawIndirectArgsSize); + ASSERT(sizeof(VkDrawIndirectCommand) == stride); if (count_address != 0) { cmdbuf.drawIndirectCount(buffer->Handle(), base, count_buffer->Handle(), count_base, - max_count, AmdGpu::Liverpool::DrawIndirectArgsSize); + max_count, stride); } else { - cmdbuf.drawIndirect(buffer->Handle(), base, max_count, - AmdGpu::Liverpool::DrawIndirectArgsSize); + cmdbuf.drawIndirect(buffer->Handle(), base, max_count, stride); } } + + ResetBindings(); } void Rasterizer::DispatchDirect() { @@ -192,18 +304,15 @@ void Rasterizer::DispatchDirect() { return; } - try { - const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache); - if (!has_resources) { - return; - } - } catch (...) { - UNREACHABLE(); + if (!BindResources(pipeline)) { + return; } scheduler.EndRendering(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z); + + ResetBindings(); } void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { @@ -216,19 +325,16 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { return; } - try { - const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache); - if (!has_resources) { - return; - } - } catch (...) { - UNREACHABLE(); + if (!BindResources(pipeline)) { + return; } scheduler.EndRendering(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); cmdbuf.dispatchIndirect(buffer->Handle(), base); + + ResetBindings(); } u64 Rasterizer::Flush() { @@ -242,86 +348,386 @@ void Rasterizer::Finish() { scheduler.Finish(); } -void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline) { +bool Rasterizer::BindResources(const Pipeline* pipeline) { + buffer_infos.clear(); + buffer_views.clear(); + image_infos.clear(); + const auto& regs = liverpool->regs; - RenderState state; - if (regs.color_control.degamma_enable) { - LOG_WARNING(Render_Vulkan, "Color buffers require gamma correction"); - } + if (pipeline->IsCompute()) { + const auto& info = pipeline->GetStage(Shader::Stage::Compute); - for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { - const auto& col_buf = regs.color_buffers[col_buf_id]; - if (!col_buf) { - continue; - } - - // If the color buffer is still bound but rendering to it is disabled by the target mask, - // we need to prevent the render area from being affected by unbound render target extents. - if (!regs.color_target_mask.GetMask(col_buf_id)) { - continue; - } - - // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform - // an unnecessary transition and may result in state conflict if the resource is already - // bound for reading. - if ((pipeline.GetMrtMask() & (1 << col_buf_id)) == 0) { - continue; - } - - const auto& hint = liverpool->last_cb_extent[col_buf_id]; - VideoCore::ImageInfo image_info{col_buf, hint}; - VideoCore::ImageViewInfo view_info{col_buf}; - const auto& image_view = texture_cache.FindRenderTarget(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - state.width = std::min(state.width, image.info.size.width); - state.height = std::min(state.height, image.info.size.height); - - const bool is_clear = texture_cache.IsMetaCleared(col_buf.CmaskAddress()); - state.color_images[state.num_color_attachments] = image.image; - state.color_attachments[state.num_color_attachments++] = { - .imageView = *image_view.image_view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, + // Most of the time when a metadata is updated with a shader it gets cleared. It means + // we can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we + // will need its full emulation anyways. For cases of metadata read a warning will be + // logged. + const auto IsMetaUpdate = [&](const auto& desc) { + const VAddr address = desc.GetSharp(info).base_address; + if (desc.is_written) { + if (texture_cache.TouchMeta(address, true)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return true; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); + } + } + return false; }; - texture_cache.TouchMeta(col_buf.CmaskAddress(), false); + + for (const auto& desc : info.buffers) { + if (desc.is_gds_buffer) { + continue; + } + if (IsMetaUpdate(desc)) { + return false; + } + } + for (const auto& desc : info.texture_buffers) { + if (IsMetaUpdate(desc)) { + return false; + } + } } - using ZFormat = AmdGpu::Liverpool::DepthBuffer::ZFormat; - using StencilFormat = AmdGpu::Liverpool::DepthBuffer::StencilFormat; - if (regs.depth_buffer.Address() != 0 && - ((regs.depth_control.depth_enable && regs.depth_buffer.z_info.format != ZFormat::Invalid) || - (regs.depth_control.stencil_enable && - regs.depth_buffer.stencil_info.format != StencilFormat::Invalid))) { - const auto htile_address = regs.depth_htile_data_base.GetAddress(); - const bool is_clear = regs.depth_render_control.depth_clear_enable || - texture_cache.IsMetaCleared(htile_address); - const auto& hint = liverpool->last_db_extent; - VideoCore::ImageInfo image_info{regs.depth_buffer, regs.depth_view.NumSlices(), - htile_address, hint}; - VideoCore::ImageViewInfo view_info{regs.depth_buffer, regs.depth_view, regs.depth_control}; - const auto& image_view = texture_cache.FindDepthTarget(image_info, view_info); - const auto& image = texture_cache.GetImage(image_view.image_id); - state.width = std::min(state.width, image.info.size.width); - state.height = std::min(state.height, image.info.size.height); - state.depth_image = image.image; - state.depth_attachment = { - .imageView = *image_view.image_view, - .imageLayout = image.last_state.layout, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = is_clear ? vk::AttachmentStoreOp::eNone : vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear, - .stencil = regs.stencil_clear}}, - }; - texture_cache.TouchMeta(htile_address, false); - state.has_depth = - regs.depth_buffer.z_info.format != AmdGpu::Liverpool::DepthBuffer::ZFormat::Invalid; - state.has_stencil = regs.depth_buffer.stencil_info.format != - AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; + set_writes.clear(); + buffer_barriers.clear(); + + // Bind resource buffers and textures. + Shader::PushData push_data{}; + Shader::Backend::Bindings binding{}; + + for (const auto* stage : pipeline->GetStages()) { + if (!stage) { + continue; + } + if (stage->uses_step_rates) { + push_data.step0 = regs.vgt_instance_step_rate_0; + push_data.step1 = regs.vgt_instance_step_rate_1; + } + stage->PushUd(binding, push_data); + + BindBuffers(*stage, binding, push_data, set_writes, buffer_barriers); + BindTextures(*stage, binding, set_writes); } + + pipeline->BindResources(set_writes, buffer_barriers, push_data); + + return true; +} + +void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding, + Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes, + Pipeline::BufferBarriers& buffer_barriers) { + buffer_bindings.clear(); + + for (const auto& desc : stage.buffers) { + const auto vsharp = desc.GetSharp(stage); + if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) { + const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); + buffer_bindings.emplace_back(buffer_id, vsharp); + } else { + buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp); + } + } + + texbuffer_bindings.clear(); + + for (const auto& desc : stage.texture_buffers) { + const auto vsharp = desc.GetSharp(stage); + if (vsharp.base_address != 0 && vsharp.GetSize() > 0 && + vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) { + const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize()); + texbuffer_bindings.emplace_back(buffer_id, vsharp); + } else { + texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp); + } + } + + // Bind the flattened user data buffer as a UBO so it's accessible to the shader + if (stage.has_readconst) { + const auto [vk_buffer, offset] = buffer_cache.ObtainHostUBO(stage.flattened_ud_buf); + buffer_infos.emplace_back(vk_buffer->Handle(), offset, + stage.flattened_ud_buf.size() * sizeof(u32)); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eUniformBuffer, + .pBufferInfo = &buffer_infos.back(), + }); + ++binding.buffer; + } + + // Second pass to re-bind buffers that were updated after binding + for (u32 i = 0; i < buffer_bindings.size(); i++) { + const auto& [buffer_id, vsharp] = buffer_bindings[i]; + const auto& desc = stage.buffers[i]; + const bool is_storage = desc.IsStorage(vsharp); + if (!buffer_id) { + if (desc.is_gds_buffer) { + const auto* gds_buf = buffer_cache.GetGdsBuffer(); + buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes()); + } else if (instance.IsNullDescriptorSupported()) { + buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); + } else { + auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID); + buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); + } + } else { + const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( + vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id); + const u32 alignment = + is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + ASSERT(adjust % 4 == 0); + push_data.AddOffset(binding.buffer, adjust); + buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, + vsharp.GetSize() + adjust); + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer + : vk::DescriptorType::eUniformBuffer, + .pBufferInfo = &buffer_infos.back(), + }); + ++binding.buffer; + } + + const auto null_buffer_view = + instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); + for (u32 i = 0; i < texbuffer_bindings.size(); i++) { + const auto& [buffer_id, vsharp] = texbuffer_bindings[i]; + const auto& desc = stage.texture_buffers[i]; + vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); + if (buffer_id) { + const u32 alignment = instance.TexelBufferMinAlignment(); + const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( + vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); + const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; + ASSERT_MSG(fmt_stride == vsharp.GetStride(), + "Texel buffer stride must match format stride"); + const u32 offset_aligned = Common::AlignDown(offset, alignment); + const u32 adjust = offset - offset_aligned; + ASSERT(adjust % fmt_stride == 0); + push_data.AddOffset(binding.buffer, adjust / fmt_stride); + buffer_view = + vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, + vsharp.GetDataFmt(), vsharp.GetNumberFmt()); + if (auto barrier = + vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite + : vk::AccessFlagBits2::eShaderRead, + vk::PipelineStageFlagBits2::eComputeShader)) { + buffer_barriers.emplace_back(*barrier); + } + if (desc.is_written) { + texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize()); + } + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer + : vk::DescriptorType::eUniformTexelBuffer, + .pTexelBufferView = &buffer_view, + }); + ++binding.buffer; + } +} + +void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding, + Pipeline::DescriptorWrites& set_writes) { + image_bindings.clear(); + + for (const auto& image_desc : stage.images) { + const auto tsharp = image_desc.GetSharp(stage); + if (texture_cache.IsMeta(tsharp.Address())) { + LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a shader (texture)"); + } + + if (tsharp.GetDataFmt() == AmdGpu::DataFormat::FormatInvalid) { + image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{}); + continue; + } + + auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, + std::tuple{tsharp, image_desc}); + image_id = texture_cache.FindImage(desc); + auto& image = texture_cache.GetImage(image_id); + if (image.binding.is_bound) { + // The image is already bound. In case if it is about to be used as storage we need + // to force general layout on it. + image.binding.force_general |= image_desc.is_storage; + } + if (image.binding.is_target) { + // The image is already bound as target. Since we read and output to it need to force + // general layout too. + image.binding.force_general = 1u; + } + image.binding.is_bound = 1u; + } + + // Second pass to re-bind images that were updated after binding + for (auto& [image_id, desc] : image_bindings) { + bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage; + if (!image_id) { + if (instance.IsNullDescriptorSupported()) { + image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + } else { + auto& null_image = texture_cache.GetImageView(VideoCore::NULL_IMAGE_VIEW_ID); + image_infos.emplace_back(VK_NULL_HANDLE, *null_image.image_view, + vk::ImageLayout::eGeneral); + } + } else { + if (auto& old_image = texture_cache.GetImage(image_id); + old_image.binding.needs_rebind) { + old_image.binding.Reset(); // clean up previous image binding state + image_id = texture_cache.FindImage(desc); + } + + bound_images.emplace_back(image_id); + + auto& image = texture_cache.GetImage(image_id); + auto& image_view = texture_cache.FindTexture(image_id, desc.view_info); + + if (image.binding.force_general || image.binding.is_target) { + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits2::eShaderRead | + (image.info.IsDepthStencil() + ? vk::AccessFlagBits2::eDepthStencilAttachmentWrite + : vk::AccessFlagBits2::eColorAttachmentWrite), + {}); + } else { + if (is_storage) { + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits2::eShaderRead | + vk::AccessFlagBits2::eShaderWrite, + desc.view_info.range); + } else { + const auto new_layout = image.info.IsDepthStencil() + ? vk::ImageLayout::eDepthStencilReadOnlyOptimal + : vk::ImageLayout::eShaderReadOnlyOptimal; + image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, + desc.view_info.range); + } + } + image.usage.storage |= is_storage; + image.usage.texture |= !is_storage; + + image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, + image.last_state.layout); + } + + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = + is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, + .pImageInfo = &image_infos.back(), + }); + } + + for (const auto& sampler : stage.samplers) { + auto ssharp = sampler.GetSharp(stage); + if (sampler.disable_aniso) { + const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage); + if (tsharp.base_level == 0 && tsharp.last_level == 0) { + ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One); + } + } + const auto vk_sampler = texture_cache.GetSampler(ssharp); + image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); + set_writes.push_back({ + .dstSet = VK_NULL_HANDLE, + .dstBinding = binding.unified++, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampler, + .pImageInfo = &image_infos.back(), + }); + } +} + +void Rasterizer::BeginRendering(const GraphicsPipeline& pipeline, RenderState& state) { + int cb_index = 0; + for (auto& [image_id, desc] : cb_descs) { + if (auto& old_img = texture_cache.GetImage(image_id); old_img.binding.needs_rebind) { + auto& view = texture_cache.FindRenderTarget(desc); + ASSERT(view.image_id != image_id); + image_id = bound_images.emplace_back(view.image_id); + auto& image = texture_cache.GetImage(view.image_id); + state.color_attachments[cb_index].imageView = *view.image_view; + state.color_attachments[cb_index].imageLayout = image.last_state.layout; + state.color_images[cb_index] = image.image; + + const auto mip = view.info.range.base.level; + state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); + state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); + ASSERT(old_img.info.size.width == state.width); + ASSERT(old_img.info.size.height == state.height); + } + auto& image = texture_cache.GetImage(image_id); + if (image.binding.force_general) { + image.Transit( + vk::ImageLayout::eGeneral, + vk::AccessFlagBits2::eColorAttachmentWrite | vk::AccessFlagBits2::eShaderRead, {}); + + } else { + image.Transit(vk::ImageLayout::eColorAttachmentOptimal, + vk::AccessFlagBits2::eColorAttachmentWrite | + vk::AccessFlagBits2::eColorAttachmentRead, + desc.view_info.range); + } + image.usage.render_target = 1u; + state.color_attachments[cb_index].imageLayout = image.last_state.layout; + ++cb_index; + } + + if (db_desc) { + const auto& image_id = std::get<0>(*db_desc); + const auto& desc = std::get<1>(*db_desc); + auto& image = texture_cache.GetImage(image_id); + ASSERT(image.binding.needs_rebind == 0); + const bool has_stencil = image.usage.stencil; + if (has_stencil) { + image.aspect_mask |= vk::ImageAspectFlagBits::eStencil; + } + if (image.binding.force_general) { + image.Transit(vk::ImageLayout::eGeneral, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite | + vk::AccessFlagBits2::eShaderRead, + {}); + } else { + const auto new_layout = desc.view_info.is_storage + ? has_stencil + ? vk::ImageLayout::eDepthStencilAttachmentOptimal + : vk::ImageLayout::eDepthAttachmentOptimal + : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal + : vk::ImageLayout::eDepthReadOnlyOptimal; + image.Transit(new_layout, + vk::AccessFlagBits2::eDepthStencilAttachmentWrite | + vk::AccessFlagBits2::eDepthStencilAttachmentRead, + desc.view_info.range); + } + state.depth_attachment.imageLayout = image.last_state.layout; + image.usage.depth_target = true; + image.usage.stencil = has_stencil; + } + scheduler.BeginRendering(state); } @@ -331,10 +737,12 @@ void Rasterizer::Resolve() { // Read from MRT0, average all samples, and write to MRT1, which is one-sample const auto& mrt0_hint = liverpool->last_cb_extent[0]; const auto& mrt1_hint = liverpool->last_cb_extent[1]; - VideoCore::ImageInfo mrt0_info{liverpool->regs.color_buffers[0], mrt0_hint}; - VideoCore::ImageInfo mrt1_info{liverpool->regs.color_buffers[1], mrt1_hint}; - auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_info)); - auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_info)); + VideoCore::TextureCache::RenderTargetDesc mrt0_desc{liverpool->regs.color_buffers[0], + mrt0_hint}; + VideoCore::TextureCache::RenderTargetDesc mrt1_desc{liverpool->regs.color_buffers[1], + mrt1_hint}; + auto& mrt0_image = texture_cache.GetImage(texture_cache.FindImage(mrt0_desc)); + auto& mrt1_image = texture_cache.GetImage(texture_cache.FindImage(mrt1_desc)); VideoCore::SubresourceRange mrt0_range; mrt0_range.base.layer = liverpool->regs.color_buffers[0].view.slice_start; @@ -384,9 +792,9 @@ u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { return value; } -void Rasterizer::InvalidateMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size); - texture_cache.InvalidateMemory(addr, size); +void Rasterizer::InvalidateMemory(VAddr addr, VAddr addr_aligned, u64 size) { + buffer_cache.InvalidateMemory(addr_aligned, size); + texture_cache.InvalidateMemory(addr, addr_aligned, size); } void Rasterizer::MapMemory(VAddr addr, u64 size) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b6813aec9..fe8aceba7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -19,6 +19,7 @@ class MemoryManager; namespace Vulkan { class Scheduler; +class RenderState; class GraphicsPipeline; class Rasterizer { @@ -45,7 +46,7 @@ public: void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds); u32 ReadDataFromGds(u32 gsd_offset); - void InvalidateMemory(VAddr addr, u64 size); + void InvalidateMemory(VAddr addr, VAddr addr_aligned, u64 size); void MapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size); @@ -54,7 +55,8 @@ public: void Finish(); private: - void BeginRendering(const GraphicsPipeline& pipeline); + RenderState PrepareRenderState(u32 mrt_mask); + void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state); void Resolve(); void UpdateDynamicState(const GraphicsPipeline& pipeline); @@ -63,6 +65,21 @@ private: bool FilterDraw(); + void BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding, + Shader::PushData& push_data, Pipeline::DescriptorWrites& set_writes, + Pipeline::BufferBarriers& buffer_barriers); + + void BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding, + Pipeline::DescriptorWrites& set_writes); + + bool BindResources(const Pipeline* pipeline); + void ResetBindings() { + for (auto& image_id : bound_images) { + texture_cache.GetImage(image_id).binding.Reset(); + } + bound_images.clear(); + } + private: const Instance& instance; Scheduler& scheduler; @@ -72,6 +89,25 @@ private: AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; PipelineCache pipeline_cache; + + boost::container::static_vector< + std::pair, 8> + cb_descs; + std::optional> db_desc; + boost::container::static_vector image_infos; + boost::container::static_vector buffer_views; + boost::container::static_vector buffer_infos; + boost::container::static_vector bound_images; + + Pipeline::DescriptorWrites set_writes; + Pipeline::BufferBarriers buffer_barriers; + + using BufferBindingInfo = std::pair; + boost::container::static_vector buffer_bindings; + using TexBufferBindingInfo = std::pair; + boost::container::static_vector texbuffer_bindings; + using ImageBindingInfo = std::pair; + boost::container::static_vector image_bindings; }; } // namespace Vulkan diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index bea2ce4ff..3d5202ad6 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -61,6 +61,15 @@ bool ImageInfo::IsDepthStencil() const { } } +bool ImageInfo::HasStencil() const { + if (pixel_format == vk::Format::eD32SfloatS8Uint || + pixel_format == vk::Format::eD24UnormS8Uint || + pixel_format == vk::Format::eD16UnormS8Uint) { + return true; + } + return false; +} + static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) { vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | @@ -135,22 +144,24 @@ void UniqueImage::Create(const vk::ImageCreateInfo& image_ci) { Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const ImageInfo& info_) : instance{&instance_}, scheduler{&scheduler_}, info{info_}, - image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, - cpu_addr_end{cpu_addr + info.guest_size_bytes} { + image{instance->GetDevice(), instance->GetAllocator()} { mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - if (info.props.is_cube || (info.type == vk::ImageType::e2D && info.resources.layers >= 6)) { + const bool can_be_cube = (info.type == vk::ImageType::e2D) && + (info.resources.layers % 6 == 0) && + (info.size.width == info.size.height); + if (info.props.is_cube || can_be_cube) { flags |= vk::ImageCreateFlagBits::eCubeCompatible; } else if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } - usage = ImageUsageFlags(info); - format_features = FormatFeatureFlags(usage); + usage_flags = ImageUsageFlags(info); + format_features = FormatFeatureFlags(usage_flags); switch (info.pixel_format) { case vk::Format::eD16Unorm: @@ -170,7 +181,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, constexpr auto tiling = vk::ImageTiling::eOptimal; const auto supported_format = instance->GetSupportedFormat(info.pixel_format, format_features); const auto properties = instance->GetPhysicalDevice().getImageFormatProperties( - supported_format, info.type, tiling, usage, flags); + supported_format, info.type, tiling, usage_flags, flags); const auto supported_samples = properties.result == vk::Result::eSuccess ? properties.value.sampleCounts : vk::SampleCountFlagBits::e1; @@ -188,7 +199,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, .arrayLayers = static_cast(info.resources.layers), .samples = LiverpoolToVK::NumSamples(info.num_samples, supported_samples), .tiling = tiling, - .usage = usage, + .usage = usage_flags, .initialLayout = vk::ImageLayout::eUndefined, }; diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 312ff97e8..a1b1b007f 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -22,16 +22,15 @@ VK_DEFINE_HANDLE(VmaAllocator) namespace VideoCore { enum ImageFlagBits : u32 { - CpuDirty = 1 << 1, ///< Contents have been modified from the CPU + Empty = 0, + MaybeCpuDirty = 1 << 0, ///< The page this image is in was touched before the image address + CpuDirty = 1 << 1, ///< Contents have been modified from the CPU GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache) - Dirty = CpuDirty | GpuDirty, + Dirty = MaybeCpuDirty | CpuDirty | GpuDirty, GpuModified = 1 << 3, ///< Contents have been modified from the GPU - Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered - Bound = 1 << 9, ///< True when the image is bound to a descriptor set - NeedsRebind = 1 << 10, ///< True when the image needs to be rebound }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -80,7 +79,9 @@ struct Image { [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { const VAddr overlap_end = overlap_cpu_addr + overlap_size; - return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + const auto image_addr = info.guest_address; + const auto image_end = info.guest_address + info.guest_size_bytes; + return image_addr < overlap_end && overlap_cpu_addr < image_end; } ImageViewId FindView(const ImageViewInfo& info) const { @@ -101,19 +102,31 @@ struct Image { void CopyImage(const Image& image); void CopyMip(const Image& image, u32 mip); + bool IsTracked() { + return track_addr != 0 && track_addr_end != 0; + } + const Vulkan::Instance* instance; Vulkan::Scheduler* scheduler; ImageInfo info; UniqueImage image; vk::ImageAspectFlags aspect_mask = vk::ImageAspectFlagBits::eColor; ImageFlagBits flags = ImageFlagBits::Dirty; - VAddr cpu_addr = 0; - VAddr cpu_addr_end = 0; + VAddr track_addr = 0; + VAddr track_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; // Resource state tracking - vk::ImageUsageFlags usage; + struct { + u32 texture : 1; + u32 storage : 1; + u32 render_target : 1; + u32 depth_target : 1; + u32 stencil : 1; + u32 vo_surface : 1; + } usage{}; + vk::ImageUsageFlags usage_flags; vk::FormatFeatureFlags2 format_features; struct State { vk::Flags pl_stage = vk::PipelineStageFlagBits2::eAllCommands; @@ -124,6 +137,23 @@ struct Image { std::vector subresource_states{}; boost::container::small_vector mip_hashes{}; u64 tick_accessed_last{0}; + u64 hash{0}; + + struct { + union { + struct { + u32 is_bound : 1; // the image is bound to a descriptor set + u32 is_target : 1; // the image is bound as color/depth target + u32 needs_rebind : 1; // the image needs to be rebound + u32 force_general : 1; // the image needs to be used in general layout + }; + u32 raw{}; + }; + + void Reset() { + raw = 0u; + } + } binding{}; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 2956a2a3e..0ed36ee39 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -245,7 +245,6 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group, size.width = attrib.width; size.height = attrib.height; pitch = attrib.tiling_mode == TilingMode::Linear ? size.width : (size.width + 127) & (~127); - usage.vo_buffer = true; num_bits = attrib.pixel_format != VideoOutFormat::A16R16G16B16Float ? 32 : 64; ASSERT(num_bits == 32); @@ -277,7 +276,6 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, resources.layers = buffer.NumSlices(); meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0; meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0; - usage.render_target = true; guest_address = buffer.Address(); const auto color_slice_sz = buffer.GetColorSliceSize(); @@ -299,9 +297,6 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice pitch = buffer.Pitch(); resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; - usage.depth_target = true; - usage.stencil = - buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); @@ -330,7 +325,6 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de resources.layers = image.NumLayers(desc.is_array); num_samples = image.NumSamples(); num_bits = NumBits(image.GetDataFmt()); - usage.texture = true; guest_address = image.Address(); @@ -392,7 +386,6 @@ void ImageInfo::UpdateSize() { } } mip_info.size *= mip_d; - mip_info.offset = guest_size_bytes; mips_layout.emplace_back(mip_info); guest_size_bytes += mip_info.size; @@ -400,79 +393,87 @@ void ImageInfo::UpdateSize() { guest_size_bytes *= resources.layers; } -bool ImageInfo::IsMipOf(const ImageInfo& info) const { +int ImageInfo::IsMipOf(const ImageInfo& info) const { if (!IsCompatible(info)) { - return false; + return -1; + } + + if (IsTilingCompatible(info.tiling_idx, tiling_idx)) { + return -1; } // Currently we expect only on level to be copied. if (resources.levels != 1) { - return false; + return -1; } - const int mip = info.resources.levels - resources.levels; - if (mip < 1) { - return false; + if (info.mips_layout.empty()) { + UNREACHABLE(); } + // Find mip + auto mip = -1; + for (auto m = 0; m < info.mips_layout.size(); ++m) { + if (guest_address == (info.guest_address + info.mips_layout[m].offset)) { + mip = m; + break; + } + } + + if (mip < 0) { + return -1; + } + ASSERT(mip != 0); + const auto mip_w = std::max(info.size.width >> mip, 1u); const auto mip_h = std::max(info.size.height >> mip, 1u); if ((size.width != mip_w) || (size.height != mip_h)) { - return false; + return -1; } const auto mip_d = std::max(info.size.depth >> mip, 1u); if (info.type == vk::ImageType::e3D && type == vk::ImageType::e2D) { // In case of 2D array to 3D copy, make sure we have proper number of layers. if (resources.layers != mip_d) { - return false; + return -1; } } else { if (type != info.type) { - return false; + return -1; } } - // Check if the mip has correct size. - if (info.mips_layout.size() <= mip || info.mips_layout[mip].size != guest_size_bytes) { - return false; - } - - // Ensure that address matches too. - if ((info.guest_address + info.mips_layout[mip].offset) != guest_address) { - return false; - } - - return true; + return mip; } -bool ImageInfo::IsSliceOf(const ImageInfo& info) const { +int ImageInfo::IsSliceOf(const ImageInfo& info) const { if (!IsCompatible(info)) { - return false; + return -1; } // Array slices should be of the same type. if (type != info.type) { - return false; + return -1; } // 2D dimensions of both images should be the same. if ((size.width != info.size.width) || (size.height != info.size.height)) { - return false; + return -1; } // Check for size alignment. const bool slice_size = info.guest_size_bytes / info.resources.layers; if (guest_size_bytes % slice_size != 0) { - return false; + return -1; } // Ensure that address is aligned too. - if (((info.guest_address - guest_address) % guest_size_bytes) != 0) { - return false; + const auto addr_diff = guest_address - info.guest_address; + if ((addr_diff % guest_size_bytes) != 0) { + return -1; } - return true; + return addr_diff / guest_size_bytes; } } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 2ae2547f7..e12ae3be1 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -28,14 +28,28 @@ struct ImageInfo { bool IsBlockCoded() const; bool IsPacked() const; bool IsDepthStencil() const; + bool HasStencil() const; - bool IsMipOf(const ImageInfo& info) const; - bool IsSliceOf(const ImageInfo& info) const; + int IsMipOf(const ImageInfo& info) const; + int IsSliceOf(const ImageInfo& info) const; /// Verifies if images are compatible for subresource merging. bool IsCompatible(const ImageInfo& info) const { - return (pixel_format == info.pixel_format && tiling_idx == info.tiling_idx && - num_samples == info.num_samples && num_bits == info.num_bits); + return (pixel_format == info.pixel_format && num_samples == info.num_samples && + num_bits == info.num_bits); + } + + bool IsTilingCompatible(u32 lhs, u32 rhs) const { + if (lhs == rhs) { + return true; + } + if (lhs == 0x0e && rhs == 0x0d) { + return true; + } + if (lhs == 0x0d && rhs == 0x0e) { + return true; + } + return false; } void UpdateSize(); @@ -46,15 +60,6 @@ struct ImageInfo { VAddr htile_addr; } meta_info{}; - struct { - u32 texture : 1; - u32 storage : 1; - u32 render_target : 1; - u32 depth_target : 1; - u32 stencil : 1; - u32 vo_buffer : 1; - } usage{}; // Usage data tracked during image lifetime - struct { u32 is_cube : 1; u32 is_volume : 1; @@ -81,6 +86,9 @@ struct ImageInfo { VAddr guest_address{0}; u32 guest_size_bytes{0}; u32 tiling_idx{0}; // TODO: merge with existing! + + VAddr stencil_addr{0}; + u32 stencil_size{0}; }; } // namespace VideoCore diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 8bde37941..488d44a7f 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -149,7 +149,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::DepthBuffer& depth_buffer, ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, ImageId image_id_) : image_id{image_id_}, info{info_} { - vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage}; + vk::ImageViewUsageCreateInfo usage_ci{.usage = image.usage_flags}; if (!info.is_storage) { usage_ci.usage &= ~vk::ImageUsageFlagBits::eStorage; } diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 0e8dd7ccc..4373fdc52 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -29,9 +29,12 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& info.UpdateSize(); const ImageId null_id = slot_images.insert(instance, scheduler, info); ASSERT(null_id.index == NULL_IMAGE_ID.index); - const vk::Image& null_image = slot_images[null_id].image; + auto& img = slot_images[null_id]; + const vk::Image& null_image = img.image; Vulkan::SetObjectName(instance.GetDevice(), null_image, "Null Image"); - slot_images[null_id].flags = ImageFlagBits::Tracked; + img.flags = ImageFlagBits::Empty; + img.track_addr = img.info.guest_address; + img.track_addr_end = img.info.guest_address + img.info.guest_size_bytes; ImageViewInfo view_info; const auto null_view_id = @@ -43,13 +46,43 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& TextureCache::~TextureCache() = default; -void TextureCache::InvalidateMemory(VAddr address, size_t size) { +void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { + if (image.hash == 0) { + // Initialize hash + const u8* addr = std::bit_cast(image.info.guest_address); + image.hash = XXH3_64bits(addr, image.info.guest_size_bytes); + } + image.flags |= ImageFlagBits::MaybeCpuDirty; + UntrackImage(image_id); +} + +void TextureCache::InvalidateMemory(VAddr addr, VAddr page_addr, size_t size) { std::scoped_lock lock{mutex}; - ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { - // Ensure image is reuploaded when accessed again. - image.flags |= ImageFlagBits::CpuDirty; - // Untrack image, so the range is unprotected and the guest can write freely. - UntrackImage(image_id); + ForEachImageInRegion(page_addr, size, [&](ImageId image_id, Image& image) { + const auto image_begin = image.info.guest_address; + const auto image_end = image.info.guest_address + image.info.guest_size_bytes; + const auto page_end = page_addr + size; + if (image_begin <= addr && addr < image_end) { + // This image was definitely accessed by this page fault. + // Untrack image, so the range is unprotected and the guest can write freely + image.flags |= ImageFlagBits::CpuDirty; + UntrackImage(image_id); + } else if (page_end < image_end) { + // This page access may or may not modify the image. + // We should not mark it as dirty now. If it really was modified + // it will receive more invalidations on its other pages. + // Remove tracking from this page only. + UntrackImageHead(image_id); + } else if (image_begin < page_addr) { + // This page access does not modify the image but the page should be untracked. + // We should not mark this image as dirty now. If it really was modified + // it will receive more invalidations on its other pages. + UntrackImageTail(image_id); + } else { + // Image begins and ends on this page so it can not receive any more invalidations. + // We will check it's hash later to see if it really was modified. + MarkAsMaybeDirty(image_id, image); + } }); } @@ -77,84 +110,149 @@ void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { } } -ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, ImageId cache_image_id) { - const auto& cache_info = slot_images[cache_image_id].info; +ImageId TextureCache::ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding, + ImageId cache_image_id) { + const auto& cache_image = slot_images[cache_image_id]; - const bool was_bound_as_texture = - !cache_info.usage.depth_target && (cache_info.usage.texture || cache_info.usage.storage); - if (requested_info.usage.depth_target && was_bound_as_texture) { - auto new_image_id = slot_images.insert(instance, scheduler, requested_info); + if (!cache_image.info.IsDepthStencil() && !requested_info.IsDepthStencil()) { + return {}; + } + + const bool stencil_match = requested_info.HasStencil() == cache_image.info.HasStencil(); + const bool bpp_match = requested_info.num_bits == cache_image.info.num_bits; + + // If an image in the cache has less slices we need to expand it + bool recreate = cache_image.info.resources < requested_info.resources; + + switch (binding) { + case BindingType::Texture: + // The guest requires a depth sampled texture, but cache can offer only Rxf. Need to + // recreate the image. + recreate |= requested_info.IsDepthStencil() && !cache_image.info.IsDepthStencil(); + break; + case BindingType::Storage: + // If the guest is going to use previously created depth as storage, the image needs to be + // recreated. (TODO: Probably a case with linear rgba8 aliasing is legit) + recreate |= cache_image.info.IsDepthStencil(); + break; + case BindingType::RenderTarget: + // Render target can have only Rxf format. If the cache contains only Dx[S8] we need to + // re-create the image. + ASSERT(!requested_info.IsDepthStencil()); + recreate |= cache_image.info.IsDepthStencil(); + break; + case BindingType::DepthTarget: + // The guest has requested previously allocated texture to be bound as a depth target. + // In this case we need to convert Rx float to a Dx[S8] as requested + recreate |= !cache_image.info.IsDepthStencil(); + + // The guest is trying to bind a depth target and cache has it. Need to be sure that aspects + // and bpp match + recreate |= cache_image.info.IsDepthStencil() && !(stencil_match && bpp_match); + break; + default: + break; + } + + if (recreate) { + auto new_info{requested_info}; + new_info.resources = std::max(requested_info.resources, cache_image.info.resources); + new_info.UpdateSize(); + const auto new_image_id = slot_images.insert(instance, scheduler, new_info); RegisterImage(new_image_id); + // Inherit image usage + auto& new_image = GetImage(new_image_id); + new_image.usage = cache_image.usage; + // TODO: perform a depth copy here FreeImage(cache_image_id); return new_image_id; } - const bool should_bind_as_texture = - !requested_info.usage.depth_target && - (requested_info.usage.texture || requested_info.usage.storage); - if (cache_info.usage.depth_target && should_bind_as_texture) { - if (cache_info.resources == requested_info.resources) { - return cache_image_id; - } else { - // UNREACHABLE(); - } - } - - return {}; + // Will be handled by view + return cache_image_id; } -ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_image_id, - ImageId merged_image_id) { +std::tuple TextureCache::ResolveOverlap(const ImageInfo& image_info, + BindingType binding, + ImageId cache_image_id, + ImageId merged_image_id) { auto& tex_cache_image = slot_images[cache_image_id]; + // We can assume it is safe to delete the image if it wasn't accessed in some number of frames. + const bool safe_to_delete = + scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval; if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address if (image_info.size != tex_cache_image.info.size) { - // Very likely this kind of overlap is caused by allocation from a pool. We can assume - // it is safe to delete the image if it wasn't accessed in some amount of frames. - if (scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > - NumFramesBeforeRemoval) { - + // Very likely this kind of overlap is caused by allocation from a pool. + if (safe_to_delete) { FreeImage(cache_image_id); } - return merged_image_id; + return {merged_image_id, -1, -1}; } - if (auto depth_image_id = ResolveDepthOverlap(image_info, cache_image_id)) { - return depth_image_id; + if (const auto depth_image_id = ResolveDepthOverlap(image_info, binding, cache_image_id)) { + return {depth_image_id, -1, -1}; } if (image_info.pixel_format != tex_cache_image.info.pixel_format || image_info.guest_size_bytes <= tex_cache_image.info.guest_size_bytes) { auto result_id = merged_image_id ? merged_image_id : cache_image_id; const auto& result_image = slot_images[result_id]; - return IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format) - ? result_id - : ImageId{}; + return { + IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format) + ? result_id + : ImageId{}, + -1, -1}; } ImageId new_image_id{}; if (image_info.type == tex_cache_image.info.type) { + ASSERT(image_info.resources > tex_cache_image.info.resources); new_image_id = ExpandImage(image_info, cache_image_id); } else { UNREACHABLE(); } - return new_image_id; + return {new_image_id, -1, -1}; } // Right overlap, the image requested is a possible subresource of the image from cache. if (image_info.guest_address > tex_cache_image.info.guest_address) { - // Should be handled by view. No additional actions needed. + if (auto mip = image_info.IsMipOf(tex_cache_image.info); mip >= 0) { + return {cache_image_id, mip, -1}; + } + + if (auto slice = image_info.IsSliceOf(tex_cache_image.info); slice >= 0) { + return {cache_image_id, -1, slice}; + } + + // TODO: slice and mip + + if (safe_to_delete) { + FreeImage(cache_image_id); + } + + return {{}, -1, -1}; } else { // Left overlap, the image from cache is a possible subresource of the image requested if (!merged_image_id) { // We need to have a larger, already allocated image to copy this one into - return {}; + return {{}, -1, -1}; } - if (tex_cache_image.info.IsMipOf(image_info)) { + if (auto mip = tex_cache_image.info.IsMipOf(image_info); mip >= 0) { + if (tex_cache_image.binding.is_target) { + // We have a larger image created and a separate one, representing a subres of it, + // bound as render target. In this case we need to rebind render target. + tex_cache_image.binding.needs_rebind = 1u; + GetImage(merged_image_id).binding.is_target = 1u; + + FreeImage(cache_image_id); + return {merged_image_id, -1, -1}; + } + tex_cache_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); @@ -162,13 +260,13 @@ ImageId TextureCache::ResolveOverlap(const ImageInfo& image_info, ImageId cache_ ASSERT(num_mips_to_copy == 1); auto& merged_image = slot_images[merged_image_id]; - merged_image.CopyMip(tex_cache_image, image_info.resources.levels - 1); + merged_image.CopyMip(tex_cache_image, mip); FreeImage(cache_image_id); } } - return merged_image_id; + return {merged_image_id, -1, -1}; } ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { @@ -181,8 +279,8 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); new_image.CopyImage(src_image); - if (True(src_image.flags & ImageFlagBits::Bound)) { - src_image.flags |= ImageFlagBits::NeedsRebind; + if (src_image.binding.is_bound || src_image.binding.is_target) { + src_image.binding.needs_rebind = 1u; } FreeImage(image_id); @@ -192,9 +290,11 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { return new_image_id; } -ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { +ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) { + const auto& info = desc.info; + if (info.guest_address == 0) [[unlikely]] { - return NULL_IMAGE_VIEW_ID; + return NULL_IMAGE_ID; } std::scoped_lock lock{mutex}; @@ -231,10 +331,16 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { } // Try to resolve overlaps (if any) + int view_mip{-1}; + int view_slice{-1}; if (!image_id) { for (const auto& cache_id : image_ids) { + view_mip = -1; + view_slice = -1; + const auto& merged_info = image_id ? slot_images[image_id].info : info; - image_id = ResolveOverlap(merged_info, cache_id, image_id); + std::tie(image_id, view_mip, view_slice) = + ResolveOverlap(merged_info, desc.type, cache_id, image_id); } } @@ -254,6 +360,10 @@ ImageId TextureCache::FindImage(const ImageInfo& info, FindFlags flags) { RegisterImage(image_id); } + if (view_mip > 0) { + desc.view_info.range.base.level = view_mip; + } + Image& image = slot_images[image_id]; image.tick_accessed_last = scheduler.CurrentTick(); @@ -275,100 +385,58 @@ ImageView& TextureCache::RegisterImageView(ImageId image_id, const ImageViewInfo ImageView& TextureCache::FindTexture(ImageId image_id, const ImageViewInfo& view_info) { Image& image = slot_images[image_id]; UpdateImage(image_id); - auto& usage = image.info.usage; - - if (view_info.is_storage) { - image.Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, - view_info.range); - usage.storage = true; - } else { - const auto new_layout = image.info.IsDepthStencil() - ? vk::ImageLayout::eDepthStencilReadOnlyOptimal - : vk::ImageLayout::eShaderReadOnlyOptimal; - image.Transit(new_layout, vk::AccessFlagBits2::eShaderRead, view_info.range); - usage.texture = true; - } - return RegisterImageView(image_id, view_info); } -ImageView& TextureCache::FindRenderTarget(const ImageInfo& image_info, - const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(image_info); +ImageView& TextureCache::FindRenderTarget(BaseDesc& desc) { + const ImageId image_id = FindImage(desc); Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; + image.usage.render_target = 1u; UpdateImage(image_id); - image.Transit(vk::ImageLayout::eColorAttachmentOptimal, - vk::AccessFlagBits2::eColorAttachmentWrite | - vk::AccessFlagBits2::eColorAttachmentRead, - view_info.range); - // Register meta data for this color buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { - if (image_info.meta_info.cmask_addr) { + if (desc.info.meta_info.cmask_addr) { surface_metas.emplace( - image_info.meta_info.cmask_addr, + desc.info.meta_info.cmask_addr, MetaDataInfo{.type = MetaDataInfo::Type::CMask, .is_cleared = true}); - image.info.meta_info.cmask_addr = image_info.meta_info.cmask_addr; + image.info.meta_info.cmask_addr = desc.info.meta_info.cmask_addr; image.flags |= ImageFlagBits::MetaRegistered; } - if (image_info.meta_info.fmask_addr) { + if (desc.info.meta_info.fmask_addr) { surface_metas.emplace( - image_info.meta_info.fmask_addr, + desc.info.meta_info.fmask_addr, MetaDataInfo{.type = MetaDataInfo::Type::FMask, .is_cleared = true}); - image.info.meta_info.fmask_addr = image_info.meta_info.fmask_addr; + image.info.meta_info.fmask_addr = desc.info.meta_info.fmask_addr; image.flags |= ImageFlagBits::MetaRegistered; } } - // Update tracked image usage - image.info.usage.render_target = true; - - return RegisterImageView(image_id, view_info); + return RegisterImageView(image_id, desc.view_info); } -ImageView& TextureCache::FindDepthTarget(const ImageInfo& image_info, - const ImageViewInfo& view_info) { - const ImageId image_id = FindImage(image_info); +ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { + const ImageId image_id = FindImage(desc); Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; image.flags &= ~ImageFlagBits::Dirty; - image.aspect_mask = vk::ImageAspectFlagBits::eDepth; - - const bool has_stencil = image_info.usage.stencil; - if (has_stencil) { - image.aspect_mask |= vk::ImageAspectFlagBits::eStencil; - } - - const auto new_layout = view_info.is_storage - ? has_stencil ? vk::ImageLayout::eDepthStencilAttachmentOptimal - : vk::ImageLayout::eDepthAttachmentOptimal - : has_stencil ? vk::ImageLayout::eDepthStencilReadOnlyOptimal - : vk::ImageLayout::eDepthReadOnlyOptimal; - image.Transit(new_layout, - vk::AccessFlagBits2::eDepthStencilAttachmentWrite | - vk::AccessFlagBits2::eDepthStencilAttachmentRead, - view_info.range); + image.usage.depth_target = 1u; + image.usage.stencil = image.info.HasStencil(); // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) { - if (image_info.meta_info.htile_addr) { + if (desc.info.meta_info.htile_addr) { surface_metas.emplace( - image_info.meta_info.htile_addr, + desc.info.meta_info.htile_addr, MetaDataInfo{.type = MetaDataInfo::Type::HTile, .is_cleared = true}); - image.info.meta_info.htile_addr = image_info.meta_info.htile_addr; + image.info.meta_info.htile_addr = desc.info.meta_info.htile_addr; image.flags |= ImageFlagBits::MetaRegistered; } } - // Update tracked image usage - image.info.usage.depth_target = true; - image.info.usage.stencil = has_stencil; - - return RegisterImageView(image_id, view_info); + return RegisterImageView(image_id, desc.view_info); } void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_scheduler /*= nullptr*/) { @@ -380,6 +448,23 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule return; } + if (True(image.flags & ImageFlagBits::MaybeCpuDirty) && + False(image.flags & ImageFlagBits::CpuDirty)) { + // The image size should be less than page size to be considered MaybeCpuDirty + // So this calculation should be very uncommon and reasonably fast + // For now we'll just check up to 64 first pixels + const auto addr = std::bit_cast(image.info.guest_address); + const auto w = std::min(image.info.size.width, u32(8)); + const auto h = std::min(image.info.size.height, u32(8)); + const auto size = w * h * image.info.num_bits / 8; + const u64 hash = XXH3_64bits(addr, size); + if (image.hash == hash) { + image.flags &= ~ImageFlagBits::MaybeCpuDirty; + return; + } + image.hash = hash; + } + const auto& num_layers = image.info.resources.layers; const auto& num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); @@ -390,14 +475,14 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = image.info.props.is_volume ? std::max(image.info.size.depth >> m, 1u) : 1u; - const auto& [mip_size, mip_pitch, mip_height, mip_ofs] = image.info.mips_layout[m]; + const auto& mip = image.info.mips_layout[m]; // Protect GPU modified resources from accidental CPU reuploads. const bool is_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); const bool is_gpu_dirty = True(image.flags & ImageFlagBits::GpuDirty); if (is_gpu_modified && !is_gpu_dirty) { const u8* addr = std::bit_cast(image.info.guest_address); - const u64 hash = XXH3_64bits(addr + mip_ofs, mip_size); + const u64 hash = XXH3_64bits(addr + mip.offset, mip.size); if (image.mip_hashes[m] == hash) { continue; } @@ -405,9 +490,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule } image_copy.push_back({ - .bufferOffset = mip_ofs * num_layers, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), + .bufferOffset = mip.offset * num_layers, + .bufferRowLength = static_cast(mip.pitch), + .bufferImageHeight = static_cast(mip.height), .imageSubresource{ .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, .mipLevel = m, @@ -420,6 +505,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule } if (image_copy.empty()) { + image.flags &= ~ImageFlagBits::Dirty; return; } @@ -465,16 +551,16 @@ void TextureCache::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.info.guest_size_bytes, + ForEachPage(image.info.guest_address, image.info.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); } void TextureCache::UnregisterImage(ImageId image_id) { Image& image = slot_images[image_id]; ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), - "Trying to unregister an already registered image"); + "Trying to unregister an already unregistered image"); image.flags &= ~ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { + ForEachPage(image.info.guest_address, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == nullptr) { UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PageShift); @@ -492,25 +578,106 @@ void TextureCache::UnregisterImage(ImageId image_id) { void TextureCache::TrackImage(ImageId image_id) { auto& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Tracked)) { + const auto image_begin = image.info.guest_address; + const auto image_end = image.info.guest_address + image.info.guest_size_bytes; + if (image_begin == image.track_addr && image_end == image.track_addr_end) { return; } - image.flags |= ImageFlagBits::Tracked; - tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, 1); + + if (!image.IsTracked()) { + // Re-track the whole image + image.track_addr = image_begin; + image.track_addr_end = image_end; + tracker.UpdatePagesCachedCount(image_begin, image.info.guest_size_bytes, 1); + } else { + if (image_begin < image.track_addr) { + TrackImageHead(image_id); + } + if (image.track_addr_end < image_end) { + TrackImageTail(image_id); + } + } +} + +void TextureCache::TrackImageHead(ImageId image_id) { + auto& image = slot_images[image_id]; + const auto image_begin = image.info.guest_address; + if (image_begin == image.track_addr) { + return; + } + ASSERT(image.track_addr != 0 && image_begin < image.track_addr); + const auto size = image.track_addr - image_begin; + image.track_addr = image_begin; + tracker.UpdatePagesCachedCount(image_begin, size, 1); +} + +void TextureCache::TrackImageTail(ImageId image_id) { + auto& image = slot_images[image_id]; + const auto image_end = image.info.guest_address + image.info.guest_size_bytes; + if (image_end == image.track_addr_end) { + return; + } + ASSERT(image.track_addr_end != 0 && image.track_addr_end < image_end); + const auto addr = image.track_addr_end; + const auto size = image_end - image.track_addr_end; + image.track_addr_end = image_end; + tracker.UpdatePagesCachedCount(addr, size, 1); } void TextureCache::UntrackImage(ImageId image_id) { auto& image = slot_images[image_id]; - if (False(image.flags & ImageFlagBits::Tracked)) { + if (!image.IsTracked()) { return; } - image.flags &= ~ImageFlagBits::Tracked; - tracker.UpdatePagesCachedCount(image.cpu_addr, image.info.guest_size_bytes, -1); + const auto addr = image.track_addr; + const auto size = image.track_addr_end - image.track_addr; + image.track_addr = 0; + image.track_addr_end = 0; + if (size != 0) { + tracker.UpdatePagesCachedCount(addr, size, -1); + } +} + +void TextureCache::UntrackImageHead(ImageId image_id) { + auto& image = slot_images[image_id]; + const auto image_begin = image.info.guest_address; + if (!image.IsTracked() || image_begin < image.track_addr) { + return; + } + const auto addr = tracker.GetNextPageAddr(image_begin); + const auto size = addr - image_begin; + image.track_addr = addr; + if (image.track_addr == image.track_addr_end) { + // This image spans only 2 pages and both are modified, + // but the image itself was not directly affected. + // Cehck its hash later. + MarkAsMaybeDirty(image_id, image); + } + tracker.UpdatePagesCachedCount(image_begin, size, -1); +} + +void TextureCache::UntrackImageTail(ImageId image_id) { + auto& image = slot_images[image_id]; + const auto image_end = image.info.guest_address + image.info.guest_size_bytes; + if (!image.IsTracked() || image.track_addr_end < image_end) { + return; + } + ASSERT(image.track_addr_end != 0); + const auto addr = tracker.GetPageAddr(image_end); + const auto size = image_end - addr; + image.track_addr_end = addr; + if (image.track_addr == image.track_addr_end) { + // This image spans only 2 pages and both are modified, + // but the image itself was not directly affected. + // Cehck its hash later. + MarkAsMaybeDirty(image_id, image); + } + tracker.UpdatePagesCachedCount(addr, size, -1); } void TextureCache::DeleteImage(ImageId image_id) { Image& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(!image.IsTracked(), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); // Remove any registered meta areas. diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96970bfc8..fab4c832f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,12 +43,59 @@ class TextureCache { using PageTable = MultiLevelPageTable; public: - explicit TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - BufferCache& buffer_cache, PageManager& tracker); + enum class BindingType : u32 { + Texture, + Storage, + RenderTarget, + DepthTarget, + VideoOut, + }; + + struct BaseDesc { + ImageInfo info; + ImageViewInfo view_info; + BindingType type{BindingType::Texture}; + + BaseDesc() = default; + BaseDesc(BindingType type_, ImageInfo info_, ImageViewInfo view_info_) noexcept + : info{std::move(info_)}, view_info{std::move(view_info_)}, type{type_} {} + }; + + struct TextureDesc : public BaseDesc { + TextureDesc() = default; + TextureDesc(const AmdGpu::Image& image, const Shader::ImageResource& desc) + : BaseDesc{desc.is_storage ? BindingType::Storage : BindingType::Texture, + ImageInfo{image, desc}, ImageViewInfo{image, desc}} {} + }; + + struct RenderTargetDesc : public BaseDesc { + RenderTargetDesc(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) + : BaseDesc{BindingType::RenderTarget, ImageInfo{buffer, hint}, ImageViewInfo{buffer}} {} + }; + + struct DepthTargetDesc : public BaseDesc { + DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer, + const AmdGpu::Liverpool::DepthView& view, + const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) + : BaseDesc{BindingType::DepthTarget, + ImageInfo{buffer, view.NumSlices(), htile_address, hint}, + ImageViewInfo{buffer, view, ctl}} {} + }; + + struct VideoOutDesc : public BaseDesc { + VideoOutDesc(const Libraries::VideoOut::BufferAttributeGroup& group, VAddr cpu_address) + : BaseDesc{BindingType::VideoOut, ImageInfo{group, cpu_address}, ImageViewInfo{}} {} + }; + +public: + TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + BufferCache& buffer_cache, PageManager& tracker); ~TextureCache(); /// Invalidates any image in the logical page range. - void InvalidateMemory(VAddr address, size_t size); + void InvalidateMemory(VAddr addr, VAddr page_addr, size_t size); /// Marks an image as dirty if it exists at the provided address. void InvalidateMemoryFromGPU(VAddr address, size_t max_size); @@ -57,18 +104,16 @@ public: void UnmapMemory(VAddr cpu_addr, size_t size); /// Retrieves the image handle of the image with the provided attributes. - [[nodiscard]] ImageId FindImage(const ImageInfo& info, FindFlags flags = {}); + [[nodiscard]] ImageId FindImage(BaseDesc& desc, FindFlags flags = {}); /// Retrieves an image view with the properties of the specified image id. [[nodiscard]] ImageView& FindTexture(ImageId image_id, const ImageViewInfo& view_info); /// Retrieves the render target with specified properties - [[nodiscard]] ImageView& FindRenderTarget(const ImageInfo& image_info, - const ImageViewInfo& view_info); + [[nodiscard]] ImageView& FindRenderTarget(BaseDesc& desc); /// Retrieves the depth target with specified properties - [[nodiscard]] ImageView& FindDepthTarget(const ImageInfo& image_info, - const ImageViewInfo& view_info); + [[nodiscard]] ImageView& FindDepthTarget(BaseDesc& desc); /// Updates image contents if it was modified by CPU. void UpdateImage(ImageId image_id, Vulkan::Scheduler* custom_scheduler = nullptr) { @@ -77,11 +122,13 @@ public: RefreshImage(image, custom_scheduler); } - [[nodiscard]] ImageId ResolveOverlap(const ImageInfo& info, ImageId cache_img_id, - ImageId merged_image_id); + [[nodiscard]] std::tuple ResolveOverlap(const ImageInfo& info, + BindingType binding, + ImageId cache_img_id, + ImageId merged_image_id); /// Resolves depth overlap and either re-creates the image or returns existing one - [[nodiscard]] ImageId ResolveDepthOverlap(const ImageInfo& requested_info, + [[nodiscard]] ImageId ResolveDepthOverlap(const ImageInfo& requested_info, BindingType binding, ImageId cache_img_id); [[nodiscard]] ImageId ExpandImage(const ImageInfo& info, ImageId image_id); @@ -195,9 +242,15 @@ private: /// Track CPU reads and writes for image void TrackImage(ImageId image_id); + void TrackImageHead(ImageId image_id); + void TrackImageTail(ImageId image_id); /// Stop tracking CPU reads and writes for image void UntrackImage(ImageId image_id); + void UntrackImageHead(ImageId image_id); + void UntrackImageTail(ImageId image_id); + + void MarkAsMaybeDirty(ImageId image_id, Image& image); /// Removes the image and any views/surface metas that reference it. void DeleteImage(ImageId image_id);