diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8837a6584..c1ec7b7b9 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -753,6 +753,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
src/shader_recompiler/ir/passes/ir_passes.h
+ src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/ring_access_elimination.cpp
diff --git a/externals/sirit b/externals/sirit
index d6f3c0d99..8b9b12c20 160000
--- a/externals/sirit
+++ b/externals/sirit
@@ -1 +1 @@
-Subproject commit d6f3c0d99862ab2ff8f95e9ac221560f1f97e29a
+Subproject commit 8b9b12c2089505ac8b10fa56bf56b3ed49d9d7b0
diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp
index 0c1cce5da..37554abfb 100644
--- a/src/qt_gui/check_update.cpp
+++ b/src/qt_gui/check_update.cpp
@@ -67,8 +67,20 @@ void CheckUpdate::CheckForUpdates(const bool showMessage) {
connect(reply, &QNetworkReply::finished, this, [this, reply, showMessage, updateChannel]() {
if (reply->error() != QNetworkReply::NoError) {
- QMessageBox::warning(this, tr("Error"),
- QString(tr("Network error:") + "\n" + reply->errorString()));
+ if (reply->attribute(QNetworkRequest::HttpStatusCodeAttribute).toInt() == 403) {
+ QString response = reply->readAll();
+ if (response.startsWith("{\"message\":\"API rate limit exceeded for")) {
+ QMessageBox::warning(this, tr("Auto Updater"),
+ tr("Error_Github_limit_MSG").replace("\\n", "\n"));
+ } else {
+ QMessageBox::warning(
+ this, tr("Error"),
+ QString(tr("Network error:") + "\n" + reply->errorString()));
+ }
+ } else {
+ QMessageBox::warning(this, tr("Error"),
+ QString(tr("Network error:") + "\n" + reply->errorString()));
+ }
reply->deleteLater();
return;
}
diff --git a/src/qt_gui/translations/ar.ts b/src/qt_gui/translations/ar.ts
index 617753ab8..209721b7f 100644
--- a/src/qt_gui/translations/ar.ts
+++ b/src/qt_gui/translations/ar.ts
@@ -1293,6 +1293,10 @@
Network error:
خطأ في الشبكة:
+
+ Error_Github_limit_MSG
+ يتيح التحديث التلقائي ما يصل إلى 60 عملية تحقق من التحديث في الساعة.\nلقد وصلت إلى هذا الحد. الرجاء المحاولة مرة أخرى لاحقًا.
+
Failed to parse update information.
فشل في تحليل معلومات التحديث.
diff --git a/src/qt_gui/translations/da_DK.ts b/src/qt_gui/translations/da_DK.ts
index abde6ff72..3b2bd84fa 100644
--- a/src/qt_gui/translations/da_DK.ts
+++ b/src/qt_gui/translations/da_DK.ts
@@ -1293,6 +1293,10 @@
Network error:
Netsværksfejl:
+
+ Error_Github_limit_MSG
+ Autoopdateren tillader op til 60 opdateringstjek i timen.\nDu har nået denne grænse. Prøv igen senere.
+
Failed to parse update information.
Kunne ikke analysere opdateringsoplysninger.
diff --git a/src/qt_gui/translations/de.ts b/src/qt_gui/translations/de.ts
index 4985160ff..4dbfecb18 100644
--- a/src/qt_gui/translations/de.ts
+++ b/src/qt_gui/translations/de.ts
@@ -1317,6 +1317,10 @@
Network error:
Netzwerkfehler:
+
+ Error_Github_limit_MSG
+ Der Auto-Updater erlaubt bis zu 60 Update-Überprüfungen pro Stunde.\nDu hast dieses Limit erreicht. Bitte versuche es später erneut.
+
Failed to parse update information.
Fehler beim Parsen der Aktualisierungsinformationen.
diff --git a/src/qt_gui/translations/el.ts b/src/qt_gui/translations/el.ts
index 828b99248..dfc13935b 100644
--- a/src/qt_gui/translations/el.ts
+++ b/src/qt_gui/translations/el.ts
@@ -1293,6 +1293,10 @@
Network error:
Σφάλμα δικτύου:
+
+ Error_Github_limit_MSG
+ Ο Αυτόματος Ενημερωτής επιτρέπει έως και 60 ελέγχους ενημερώσεων ανά ώρα.\nΈχετε φτάσει αυτό το όριο. Παρακαλώ δοκιμάστε ξανά αργότερα.
+
Failed to parse update information.
Αποτυχία ανάλυσης πληροφοριών ενημέρωσης.
diff --git a/src/qt_gui/translations/en.ts b/src/qt_gui/translations/en.ts
index d0540d7cd..440059b26 100644
--- a/src/qt_gui/translations/en.ts
+++ b/src/qt_gui/translations/en.ts
@@ -1326,6 +1326,10 @@
Network error:
Network error:
+
+ Error_Github_limit_MSG
+ The Auto Updater allows up to 60 update checks per hour.\nYou have reached this limit. Please try again later.
+
Failed to parse update information.
Failed to parse update information.
diff --git a/src/qt_gui/translations/es_ES.ts b/src/qt_gui/translations/es_ES.ts
index 772980994..eb35c523c 100644
--- a/src/qt_gui/translations/es_ES.ts
+++ b/src/qt_gui/translations/es_ES.ts
@@ -1309,6 +1309,10 @@
Network error:
Error de red:
+
+ Error_Github_limit_MSG
+ El actualizador automático permite hasta 60 comprobaciones de actualización por hora.\nHas alcanzado este límite. Por favor, inténtalo de nuevo más tarde.
+
Failed to parse update information.
Error al analizar la información de actualización.
diff --git a/src/qt_gui/translations/fa_IR.ts b/src/qt_gui/translations/fa_IR.ts
index 16f6533b6..288b3300e 100644
--- a/src/qt_gui/translations/fa_IR.ts
+++ b/src/qt_gui/translations/fa_IR.ts
@@ -1293,6 +1293,10 @@
Network error:
خطای شبکه:
+
+ Error_Github_limit_MSG
+ بهروزرسانی خودکار حداکثر ۶۰ بررسی بهروزرسانی در ساعت را مجاز میداند.\nشما به این محدودیت رسیدهاید. لطفاً بعداً دوباره امتحان کنید.
+
Failed to parse update information.
خطا در تجزیه اطلاعات بهروزرسانی.
diff --git a/src/qt_gui/translations/fi.ts b/src/qt_gui/translations/fi.ts
index 7269b4125..9a5de8016 100644
--- a/src/qt_gui/translations/fi.ts
+++ b/src/qt_gui/translations/fi.ts
@@ -1293,6 +1293,10 @@
Network error:
Verkkovirhe:
+
+ Error_Github_limit_MSG
+ Automaattinen päivitys sallii enintään 60 päivitystarkistusta tunnissa.\nOlet saavuttanut tämän rajan. Yritä myöhemmin uudelleen.
+
Failed to parse update information.
Päivitystietojen jäsentäminen epäonnistui.
diff --git a/src/qt_gui/translations/fr.ts b/src/qt_gui/translations/fr.ts
index efaaa9ad1..a8d526353 100644
--- a/src/qt_gui/translations/fr.ts
+++ b/src/qt_gui/translations/fr.ts
@@ -1293,6 +1293,10 @@
Network error:
Erreur réseau:
+
+ Error_Github_limit_MSG
+ Le programme de mise à jour automatique permet jusqu'à 60 vérifications de mise à jour par heure.\nVous avez atteint cette limite. Veuillez réessayer plus tard.
+
Failed to parse update information.
Échec de l'analyse des informations de mise à jour.
diff --git a/src/qt_gui/translations/hu_HU.ts b/src/qt_gui/translations/hu_HU.ts
index 98491aa87..e7efb77b9 100644
--- a/src/qt_gui/translations/hu_HU.ts
+++ b/src/qt_gui/translations/hu_HU.ts
@@ -1293,6 +1293,10 @@
Network error:
Hálózati hiba:
+
+ Error_Github_limit_MSG
+ Az automatikus frissítő óránként legfeljebb 60 frissítésellenőrzést engedélyez.\nElérte ezt a korlátot. Kérjük, próbálja újra később.
+
Failed to parse update information.
A frissítési információk elemzése sikertelen.
diff --git a/src/qt_gui/translations/id.ts b/src/qt_gui/translations/id.ts
index 931244209..12e80905b 100644
--- a/src/qt_gui/translations/id.ts
+++ b/src/qt_gui/translations/id.ts
@@ -1293,6 +1293,10 @@
Network error:
Kesalahan jaringan:
+
+ Error_Github_limit_MSG
+ Pembaruan Otomatis memungkinkan hingga 60 pemeriksaan pembaruan per jam.\nAnda telah mencapai batas ini. Silakan coba lagi nanti.
+
Failed to parse update information.
Gagal memparse informasi pembaruan.
diff --git a/src/qt_gui/translations/it.ts b/src/qt_gui/translations/it.ts
index 106d09de0..0fd06b247 100644
--- a/src/qt_gui/translations/it.ts
+++ b/src/qt_gui/translations/it.ts
@@ -1293,6 +1293,10 @@
Network error:
Errore di rete:
+
+ Error_Github_limit_MSG
+ L'Aggiornamento Automatico consente fino a 60 controlli di aggiornamento all'ora.\nHai raggiunto questo limite. Riprova più tardi.
+
Failed to parse update information.
Impossibile analizzare le informazioni di aggiornamento.
diff --git a/src/qt_gui/translations/ja_JP.ts b/src/qt_gui/translations/ja_JP.ts
index 2aae35987..e063c6ab2 100644
--- a/src/qt_gui/translations/ja_JP.ts
+++ b/src/qt_gui/translations/ja_JP.ts
@@ -1293,6 +1293,10 @@
Network error:
ネットワークエラー:
+
+ Error_Github_limit_MSG
+ 自動アップデーターは1時間に最大60回の更新チェックを許可します。\nこの制限に達しました。後でもう一度お試しください。
+
Failed to parse update information.
アップデート情報の解析に失敗しました。
diff --git a/src/qt_gui/translations/ko_KR.ts b/src/qt_gui/translations/ko_KR.ts
index 56e891214..57e0d6c01 100644
--- a/src/qt_gui/translations/ko_KR.ts
+++ b/src/qt_gui/translations/ko_KR.ts
@@ -1293,6 +1293,10 @@
Network error:
Network error:
+
+ Error_Github_limit_MSG
+ 자동 업데이트는 시간당 최대 60회의 업데이트 확인을 허용합니다.\n이 제한에 도달했습니다. 나중에 다시 시도해 주세요.
+
Failed to parse update information.
Failed to parse update information.
diff --git a/src/qt_gui/translations/lt_LT.ts b/src/qt_gui/translations/lt_LT.ts
index c73a43917..711cb183d 100644
--- a/src/qt_gui/translations/lt_LT.ts
+++ b/src/qt_gui/translations/lt_LT.ts
@@ -1293,6 +1293,10 @@
Network error:
Tinklo klaida:
+
+ Error_Github_limit_MSG
+ Automatinis atnaujinimas leidžia iki 60 atnaujinimų patikrinimų per valandą.\nJūs pasiekėte šią ribą. Bandykite dar kartą vėliau.
+
Failed to parse update information.
Nepavyko išanalizuoti atnaujinimo informacijos.
diff --git a/src/qt_gui/translations/nb.ts b/src/qt_gui/translations/nb.ts
index de6341a48..7579f7cae 100644
--- a/src/qt_gui/translations/nb.ts
+++ b/src/qt_gui/translations/nb.ts
@@ -1345,6 +1345,10 @@
Network error:
Nettverksfeil:
+
+ Error_Github_limit_MSG
+ Den automatiske oppdateringen tillater opptil 60 oppdateringssjekker per time.\nDu har nådd denne grensen. Prøv igjen senere.
+
Failed to parse update information.
Kunne ikke analysere oppdaterings-informasjonen.
diff --git a/src/qt_gui/translations/nl.ts b/src/qt_gui/translations/nl.ts
index 95ac19ef3..02596c087 100644
--- a/src/qt_gui/translations/nl.ts
+++ b/src/qt_gui/translations/nl.ts
@@ -1293,6 +1293,10 @@
Network error:
Netwerkfout:
+
+ Error_Github_limit_MSG
+ De automatische updater staat tot 60 updatecontroles per uur toe.\nJe hebt deze limiet bereikt. Probeer het later opnieuw.
+
Failed to parse update information.
Kon update-informatie niet parseren.
diff --git a/src/qt_gui/translations/pl_PL.ts b/src/qt_gui/translations/pl_PL.ts
index 89f165de2..9ca116994 100644
--- a/src/qt_gui/translations/pl_PL.ts
+++ b/src/qt_gui/translations/pl_PL.ts
@@ -1293,6 +1293,10 @@
Network error:
Błąd sieci:
+
+ Error_Github_limit_MSG
+ Automatyczna aktualizacja umożliwia maksymalnie 60 sprawdzeń aktualizacji na godzinę.\nOsiągnąłeś ten limit. Spróbuj ponownie później.
+
Failed to parse update information.
Nie udało się sparsować informacji o aktualizacji.
diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts
index 0bce16dcf..11b9e3d48 100644
--- a/src/qt_gui/translations/pt_BR.ts
+++ b/src/qt_gui/translations/pt_BR.ts
@@ -1297,6 +1297,10 @@
Network error:
Erro de rede:
+
+ Error_Github_limit_MSG
+ O Atualizador Automático permite até 60 verificações de atualização por hora.\nVocê atingiu esse limite. Por favor, tente novamente mais tarde.
+
Failed to parse update information.
Falha ao analisar as informações de atualização.
diff --git a/src/qt_gui/translations/ro_RO.ts b/src/qt_gui/translations/ro_RO.ts
index f60de9823..ebda5eda5 100644
--- a/src/qt_gui/translations/ro_RO.ts
+++ b/src/qt_gui/translations/ro_RO.ts
@@ -1293,6 +1293,10 @@
Network error:
Eroare de rețea:
+
+ Error_Github_limit_MSG
+ Actualizatorul automat permite până la 60 de verificări de actualizare pe oră.\nAți atins această limită. Vă rugăm să încercați din nou mai târziu.
+
Failed to parse update information.
Nu s-au putut analiza informațiile de actualizare.
diff --git a/src/qt_gui/translations/ru_RU.ts b/src/qt_gui/translations/ru_RU.ts
index a8b3bacb4..589e5814c 100644
--- a/src/qt_gui/translations/ru_RU.ts
+++ b/src/qt_gui/translations/ru_RU.ts
@@ -1445,6 +1445,10 @@
Network error:
Сетевая ошибка:
+
+ Error_Github_limit_MSG
+ Автообновление позволяет выполнять до 60 проверок обновлений в час.\nВы достигли этого лимита. Пожалуйста, попробуйте позже.
+
Failed to parse update information.
Не удалось разобрать информацию об обновлении.
diff --git a/src/qt_gui/translations/sq.ts b/src/qt_gui/translations/sq.ts
index 1d37fa9c3..36d098afb 100644
--- a/src/qt_gui/translations/sq.ts
+++ b/src/qt_gui/translations/sq.ts
@@ -1293,6 +1293,10 @@
Network error:
Gabim rrjeti:
+
+ Error_Github_limit_MSG
+ Përditësuesi Automatik lejon deri në 60 kontrolle për përditësime në orë.\nJu keni arritur këtë kufi. Ju lutemi provoni përsëri më vonë.
+
Failed to parse update information.
Analizimi i informacionit të përditësimit deshtoi.
diff --git a/src/qt_gui/translations/sv.ts b/src/qt_gui/translations/sv.ts
index 179064ef4..2d3ff877a 100644
--- a/src/qt_gui/translations/sv.ts
+++ b/src/qt_gui/translations/sv.ts
@@ -271,6 +271,10 @@
Network error:
Nätverksfel:
+
+ Error_Github_limit_MSG
+ Den automatiska uppdateraren tillåter upp till 60 uppdateringskontroller per timme.\nDu har nått denna gräns. Försök igen senare.
+
Failed to parse update information.
Misslyckades med att tolka uppdateringsinformationen.
diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts
index 12794e088..64807c5a6 100644
--- a/src/qt_gui/translations/tr_TR.ts
+++ b/src/qt_gui/translations/tr_TR.ts
@@ -1293,6 +1293,10 @@
Network error:
Ağ hatası:
+
+ Error_Github_limit_MSG
+ Otomatik Güncelleyici, saat başına en fazla 60 güncelleme kontrolüne izin verir.\nBu sınıra ulaştınız. Lütfen daha sonra tekrar deneyin.
+
Failed to parse update information.
Güncelleme bilgilerini ayrıştırma başarısız oldu.
diff --git a/src/qt_gui/translations/uk_UA.ts b/src/qt_gui/translations/uk_UA.ts
index 3fb26546e..f7e5a7495 100644
--- a/src/qt_gui/translations/uk_UA.ts
+++ b/src/qt_gui/translations/uk_UA.ts
@@ -1390,6 +1390,10 @@
Network error:
Мережева помилка:
+
+ Error_Github_limit_MSG
+ Автооновлення дозволяє до 60 перевірок оновлень на годину.\nВи досягли цього ліміту. Будь ласка, спробуйте пізніше.
+
Failed to parse update information.
Не вдалося розібрати інформацію про оновлення.
diff --git a/src/qt_gui/translations/vi_VN.ts b/src/qt_gui/translations/vi_VN.ts
index 32841af81..b38be2ee1 100644
--- a/src/qt_gui/translations/vi_VN.ts
+++ b/src/qt_gui/translations/vi_VN.ts
@@ -1293,6 +1293,10 @@
Network error:
Lỗi mạng:
+
+ Error_Github_limit_MSG
+ Trình cập nhật tự động cho phép tối đa 60 lần kiểm tra cập nhật mỗi giờ.\nBạn đã đạt đến giới hạn này. Vui lòng thử lại sau.
+
Failed to parse update information.
Không thể phân tích thông tin cập nhật.
diff --git a/src/qt_gui/translations/zh_CN.ts b/src/qt_gui/translations/zh_CN.ts
index 1e6124c85..867b7d860 100644
--- a/src/qt_gui/translations/zh_CN.ts
+++ b/src/qt_gui/translations/zh_CN.ts
@@ -1302,6 +1302,10 @@
Network error:
网络错误:
+
+ Error_Github_limit_MSG
+ 自动更新程序每小时最多允许 60 次更新检查。\n您已达到此限制。请稍后再试。
+
Failed to parse update information.
无法解析更新信息。
diff --git a/src/qt_gui/translations/zh_TW.ts b/src/qt_gui/translations/zh_TW.ts
index c18e173e4..faed8ae61 100644
--- a/src/qt_gui/translations/zh_TW.ts
+++ b/src/qt_gui/translations/zh_TW.ts
@@ -1293,6 +1293,10 @@
Network error:
網路錯誤:
+
+ Error_Github_limit_MSG
+ 自動更新程式每小時最多允許 60 次更新檢查。\n您已達到此限制。請稍後再試。
+
Failed to parse update information.
無法解析更新資訊。
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index f0cf15af0..3712380f5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -250,7 +250,7 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddCapability(spv::Capability::Float64);
}
ctx.AddCapability(spv::Capability::Int64);
- if (info.has_storage_images || info.has_image_buffers) {
+ if (info.has_storage_images) {
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
@@ -259,12 +259,6 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD);
}
}
- if (info.has_texel_buffers) {
- ctx.AddCapability(spv::Capability::SampledBuffer);
- }
- if (info.has_image_buffers) {
- ctx.AddCapability(spv::Capability::ImageBuffer);
- }
if (info.has_image_gather) {
ctx.AddCapability(spv::Capability::ImageGatherExtended);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
index 539c6cb81..56a6abc05 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
@@ -6,6 +6,56 @@
namespace Shader::Backend::SPIRV {
+struct R {
+ R(u32 a, u32 b) : offset(a), size(b) {}
+ u32 offset;
+ u32 size;
+};
+template
+static std::array ExtractBitFields(EmitContext& ctx, const Id value,
+ const Args... args) {
+ const auto op_func =
+ is_signed ? &EmitContext::OpBitFieldSExtract : &EmitContext::OpBitFieldUExtract;
+ std::array result{};
+ u32 i = 0;
+ (
+ [&] {
+ result[i++] = (ctx.*op_func)(ctx.U32[1], value, ctx.ConstU32(args.offset),
+ ctx.ConstU32(args.size));
+ }(),
+ ...);
+ return result;
+}
+
+template
+static Id InsertBitFields(EmitContext& ctx, const std::initializer_list values,
+ const Args... args) {
+ Id result{};
+ auto it = values.begin();
+ (
+ [&] {
+ if (it == values.begin()) {
+ result = *it;
+ } else {
+ result = ctx.OpBitFieldInsert(ctx.U32[1], result, *it, ctx.ConstU32(args.offset),
+ ctx.ConstU32(args.size));
+ }
+ ++it;
+ }(),
+ ...);
+ return result;
+}
+
+template
+static std::array ExtractComposite(EmitContext& ctx, const VectorIds type,
+ const Id value) {
+ std::array result{};
+ for (u32 i = 0; i < num_components; i++) {
+ result[i] = ctx.OpCompositeExtract(type[1], value, i);
+ }
+ return result;
+}
+
Id EmitBitCastU16F16(EmitContext& ctx, Id value) {
return ctx.OpBitcast(ctx.U16, value);
}
@@ -42,22 +92,6 @@ Id EmitPackFloat2x32(EmitContext& ctx, Id value) {
return ctx.OpBitcast(ctx.F64[1], value);
}
-Id EmitPackFloat2x16(EmitContext& ctx, Id value) {
- return ctx.OpBitcast(ctx.U32[1], value);
-}
-
-Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) {
- return ctx.OpBitcast(ctx.F16[2], value);
-}
-
-Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
- return ctx.OpPackHalf2x16(ctx.U32[1], value);
-}
-
-Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
- return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
-}
-
Id EmitPackUnorm2x16(EmitContext& ctx, Id value) {
return ctx.OpPackUnorm2x16(ctx.U32[1], value);
}
@@ -75,31 +109,157 @@ Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value) {
}
Id EmitPackUint2x16(EmitContext& ctx, Id value) {
- // No SPIR-V instruction for this, do it manually.
- const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)};
- const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)};
- return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U));
+ const auto unpacked{ctx.OpBitcast(ctx.U32[2], value)};
+ const auto [x, y] = ExtractComposite<2>(ctx, ctx.U32, unpacked);
+ return InsertBitFields(ctx, {x, y}, R(0, 16), R(16, 16));
}
Id EmitUnpackUint2x16(EmitContext& ctx, Id value) {
- // No SPIR-V instruction for this, do it manually.
- const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))};
- const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))};
- return ctx.OpCompositeConstruct(ctx.U32[2], x, y);
+ const auto [x, y] = ExtractBitFields(ctx, value, R(0, 16), R(16, 16));
+ const auto unpacked{ctx.OpCompositeConstruct(ctx.U32[2], x, y)};
+ return ctx.OpBitcast(ctx.F32[2], unpacked);
}
Id EmitPackSint2x16(EmitContext& ctx, Id value) {
- // No SPIR-V instruction for this, do it manually.
- const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)};
- const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)};
- return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U));
+ return EmitPackUint2x16(ctx, value);
}
Id EmitUnpackSint2x16(EmitContext& ctx, Id value) {
- // No SPIR-V instruction for this, do it manually.
- const auto x{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))};
- const auto y{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))};
- return ctx.OpCompositeConstruct(ctx.U32[2], x, y);
+ const auto [x, y] = ExtractBitFields(ctx, value, R(0, 16), R(16, 16));
+ const auto unpacked{ctx.OpCompositeConstruct(ctx.U32[2], x, y)};
+ return ctx.OpBitcast(ctx.F32[2], unpacked);
+}
+
+Id EmitPackHalf2x16(EmitContext& ctx, Id value) {
+ return ctx.OpPackHalf2x16(ctx.U32[1], value);
+}
+
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
+ return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
+}
+
+Id EmitPackUnorm4x8(EmitContext& ctx, Id value) {
+ return ctx.OpPackUnorm4x8(ctx.U32[1], value);
+}
+
+Id EmitUnpackUnorm4x8(EmitContext& ctx, Id value) {
+ return ctx.OpUnpackUnorm4x8(ctx.F32[4], value);
+}
+
+Id EmitPackSnorm4x8(EmitContext& ctx, Id value) {
+ return ctx.OpPackSnorm4x8(ctx.U32[1], value);
+}
+
+Id EmitUnpackSnorm4x8(EmitContext& ctx, Id value) {
+ return ctx.OpUnpackSnorm4x8(ctx.F32[4], value);
+}
+
+Id EmitPackUint4x8(EmitContext& ctx, Id value) {
+ const auto unpacked{ctx.OpBitcast(ctx.U32[4], value)};
+ const auto [x, y, z, w] = ExtractComposite<4>(ctx, ctx.U32, unpacked);
+ return InsertBitFields(ctx, {x, y, z, w}, R(0, 8), R(8, 8), R(16, 8), R(24, 8));
+}
+
+Id EmitUnpackUint4x8(EmitContext& ctx, Id value) {
+ const auto [x, y, z, w] =
+ ExtractBitFields(ctx, value, R(0, 8), R(8, 8), R(16, 8), R(24, 8));
+ const auto unpacked{ctx.OpCompositeConstruct(ctx.U32[4], x, y, z, w)};
+ return ctx.OpBitcast(ctx.F32[4], unpacked);
+}
+
+Id EmitPackSint4x8(EmitContext& ctx, Id value) {
+ return EmitPackUint4x8(ctx, value);
+}
+
+Id EmitUnpackSint4x8(EmitContext& ctx, Id value) {
+ const auto [x, y, z, w] =
+ ExtractBitFields(ctx, value, R(0, 8), R(8, 8), R(16, 8), R(24, 8));
+ const auto unpacked{ctx.OpCompositeConstruct(ctx.U32[4], x, y, z, w)};
+ return ctx.OpBitcast(ctx.F32[4], unpacked);
+}
+
+Id EmitPackUfloat10_11_11(EmitContext& ctx, Id value) {
+ const auto [x, y, z] = ExtractComposite<3>(ctx, ctx.F32, value);
+ const auto cvt_x{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, x)};
+ const auto cvt_y{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, y)};
+ const auto cvt_z{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf10, z)};
+ return InsertBitFields(ctx, {cvt_x, cvt_y, cvt_z}, R(0, 11), R(11, 11), R(22, 10));
+}
+
+Id EmitUnpackUfloat10_11_11(EmitContext& ctx, Id value) {
+ const auto [x, y, z] = ExtractBitFields(ctx, value, R(0, 11), R(11, 11), R(22, 10));
+ const auto cvt_x{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, x)};
+ const auto cvt_y{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, y)};
+ const auto cvt_z{ctx.OpFunctionCall(ctx.F32[1], ctx.uf10_to_f32, z)};
+ return ctx.OpCompositeConstruct(ctx.F32[3], cvt_x, cvt_y, cvt_z);
+}
+
+Id EmitPackUnorm2_10_10_10(EmitContext& ctx, Id value) {
+ const auto unorm_min{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(0.f), ctx.ConstF32(0.f),
+ ctx.ConstF32(0.f), ctx.ConstF32(0.f))};
+ const auto unorm_max{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(1.f), ctx.ConstF32(1.f),
+ ctx.ConstF32(1.f), ctx.ConstF32(1.f))};
+ const auto clamped{ctx.OpFClamp(ctx.F32[4], value, unorm_min, unorm_max)};
+ const auto unorm_mul{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(1023.f),
+ ctx.ConstF32(1023.f), ctx.ConstF32(1023.f),
+ ctx.ConstF32(3.f))};
+ const auto as_float{ctx.OpFMul(ctx.F32[4], clamped, unorm_mul)};
+ const auto as_uint{ctx.OpConvertFToU(ctx.U32[4], ctx.OpRoundEven(ctx.F32[4], as_float))};
+ return EmitPackUint2_10_10_10(ctx, ctx.OpBitcast(ctx.F32[4], as_uint));
+}
+
+Id EmitUnpackUnorm2_10_10_10(EmitContext& ctx, Id value) {
+ const auto unpacked{ctx.OpBitcast(ctx.U32[4], EmitUnpackUint2_10_10_10(ctx, value))};
+ const auto as_float{ctx.OpConvertUToF(ctx.F32[4], unpacked)};
+ const auto unorm_div{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(1023.f),
+ ctx.ConstF32(1023.f), ctx.ConstF32(1023.f),
+ ctx.ConstF32(3.f))};
+ return ctx.OpFDiv(ctx.F32[4], as_float, unorm_div);
+}
+
+Id EmitPackSnorm2_10_10_10(EmitContext& ctx, Id value) {
+ const auto snorm_min{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(-1.f), ctx.ConstF32(-1.f),
+ ctx.ConstF32(-1.f), ctx.ConstF32(-1.f))};
+ const auto snorm_max{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(1.f), ctx.ConstF32(1.f),
+ ctx.ConstF32(1.f), ctx.ConstF32(1.f))};
+ const auto clamped{ctx.OpFClamp(ctx.F32[4], value, snorm_min, snorm_max)};
+ const auto snorm_mul{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(511.f), ctx.ConstF32(511.f),
+ ctx.ConstF32(511.f), ctx.ConstF32(1.f))};
+ const auto as_float{ctx.OpFMul(ctx.F32[4], clamped, snorm_mul)};
+ const auto as_sint{ctx.OpConvertFToS(ctx.U32[4], ctx.OpRoundEven(ctx.F32[4], as_float))};
+ return EmitPackSint2_10_10_10(ctx, ctx.OpBitcast(ctx.F32[4], as_sint));
+}
+
+Id EmitUnpackSnorm2_10_10_10(EmitContext& ctx, Id value) {
+ const auto unpacked{ctx.OpBitcast(ctx.U32[4], EmitUnpackSint2_10_10_10(ctx, value))};
+ const auto as_float{ctx.OpConvertSToF(ctx.F32[4], unpacked)};
+ const auto snorm_div{ctx.ConstantComposite(ctx.F32[4], ctx.ConstF32(511.f), ctx.ConstF32(511.f),
+ ctx.ConstF32(511.f), ctx.ConstF32(1.f))};
+ return ctx.OpFDiv(ctx.F32[4], as_float, snorm_div);
+}
+
+Id EmitPackUint2_10_10_10(EmitContext& ctx, Id value) {
+ const auto unpacked{ctx.OpBitcast(ctx.U32[4], value)};
+ const auto [x, y, z, w] = ExtractComposite<4>(ctx, ctx.U32, unpacked);
+ return InsertBitFields(ctx, {x, y, z, w}, R(0, 10), R(10, 10), R(20, 10), R(30, 2));
+}
+
+Id EmitUnpackUint2_10_10_10(EmitContext& ctx, Id value) {
+ const auto [x, y, z, w] =
+ ExtractBitFields(ctx, value, R(0, 10), R(10, 10), R(20, 10), R(30, 2));
+ const auto unpacked{ctx.OpCompositeConstruct(ctx.U32[4], x, y, z, w)};
+ return ctx.OpBitcast(ctx.F32[4], unpacked);
+}
+
+Id EmitPackSint2_10_10_10(EmitContext& ctx, Id value) {
+ return EmitPackUint2_10_10_10(ctx, value);
+}
+
+Id EmitUnpackSint2_10_10_10(EmitContext& ctx, Id value) {
+ const auto [x, y, z, w] =
+ ExtractBitFields(ctx, value, R(0, 10), R(10, 10), R(20, 10), R(30, 2));
+ const auto unpacked{ctx.OpCompositeConstruct(ctx.U32[4], x, y, z, w)};
+ return ctx.OpBitcast(ctx.F32[4], unpacked);
}
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
index d064b5d05..4f9e6040e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -24,6 +24,10 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, I
return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2, e3, e4);
}
+Id EmitCompositeConstructU32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
+ return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2);
+}
+
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
}
@@ -124,6 +128,10 @@ Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, I
return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4);
}
+Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
+ return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2);
+}
+
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 4550440bb..ae77ed413 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -416,6 +416,20 @@ static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
}
}
+Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
+ const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
+ const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
+ const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(8u));
+}
+
+Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
+ const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
+ const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
+ const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
+ return ctx.OpBitFieldUExtract(ctx.U32[1], dword, bit_offset, ctx.ConstU32(16u));
+}
+
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<1>(ctx, handle, address);
}
@@ -432,18 +446,24 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<4>(ctx, handle, address);
}
+Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+ return ctx.OpBitcast(ctx.F32[1], EmitLoadBufferU32(ctx, inst, handle, address));
+}
+
+Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+ return ctx.OpBitcast(ctx.F32[2], EmitLoadBufferU32x2(ctx, inst, handle, address));
+}
+
+Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+ return ctx.OpBitcast(ctx.F32[3], EmitLoadBufferU32x3(ctx, inst, handle, address));
+}
+
+Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
+ return ctx.OpBitcast(ctx.F32[4], EmitLoadBufferU32x4(ctx, inst, handle, address));
+}
+
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
- const auto& buffer = ctx.texture_buffers[handle];
- const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
- const Id coord =
- ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift),
- buffer.coord_offset);
- Id texel = buffer.is_storage ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord)
- : ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
- if (buffer.is_integer) {
- texel = ctx.OpBitcast(ctx.F32[4], texel);
- }
- return texel;
+ UNREACHABLE_MSG("SPIR-V instruction");
}
template
@@ -464,32 +484,56 @@ static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id va
}
}
-void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
+ const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(3u))};
+ const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
+ const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
+ const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(8u))};
+ EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
+}
+
+void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
+ const Id byte_index{ctx.OpBitwiseAnd(ctx.U32[1], address, ctx.ConstU32(2u))};
+ const Id bit_offset{ctx.OpShiftLeftLogical(ctx.U32[1], byte_index, ctx.ConstU32(3u))};
+ const Id dword{EmitLoadBufferU32xN<1>(ctx, handle, address)};
+ const Id new_val{ctx.OpBitFieldInsert(ctx.U32[1], dword, value, bit_offset, ctx.ConstU32(16u))};
+ EmitStoreBufferU32xN<1>(ctx, handle, address, new_val);
+}
+
+void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<1>(ctx, handle, address, value);
}
-void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
}
-void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
}
-void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
}
+void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+ EmitStoreBufferU32(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[1], value));
+}
+
+void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+ EmitStoreBufferU32x2(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[2], value));
+}
+
+void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+ EmitStoreBufferU32x3(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[3], value));
+}
+
+void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
+ EmitStoreBufferU32x4(ctx, inst, handle, address, ctx.OpBitcast(ctx.U32[4], value));
+}
+
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
- const auto& buffer = ctx.texture_buffers[handle];
- const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
- const Id coord =
- ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift),
- buffer.coord_offset);
- if (buffer.is_integer) {
- value = ctx.OpBitcast(buffer.result_type, value);
- }
- ctx.OpImageWrite(tex_buffer, coord, value);
+ UNREACHABLE_MSG("SPIR-V instruction");
}
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 842b13207..3e2cea9e5 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -63,15 +63,27 @@ void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
+Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
+void EmitStoreBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+void EmitStoreBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
@@ -123,6 +135,7 @@ Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeConstructU32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
@@ -151,6 +164,7 @@ Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32
Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
+Id EmitCompositeConstructF32x2x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
@@ -193,10 +207,6 @@ void EmitBitCastF64U64(EmitContext& ctx);
Id EmitPackUint2x32(EmitContext& ctx, Id value);
Id EmitUnpackUint2x32(EmitContext& ctx, Id value);
Id EmitPackFloat2x32(EmitContext& ctx, Id value);
-Id EmitPackFloat2x16(EmitContext& ctx, Id value);
-Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
-Id EmitPackHalf2x16(EmitContext& ctx, Id value);
-Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
Id EmitPackUnorm2x16(EmitContext& ctx, Id value);
Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value);
Id EmitPackSnorm2x16(EmitContext& ctx, Id value);
@@ -205,6 +215,26 @@ Id EmitPackUint2x16(EmitContext& ctx, Id value);
Id EmitUnpackUint2x16(EmitContext& ctx, Id value);
Id EmitPackSint2x16(EmitContext& ctx, Id value);
Id EmitUnpackSint2x16(EmitContext& ctx, Id value);
+Id EmitPackHalf2x16(EmitContext& ctx, Id value);
+Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
+Id EmitPackUnorm4x8(EmitContext& ctx, Id value);
+Id EmitUnpackUnorm4x8(EmitContext& ctx, Id value);
+Id EmitPackSnorm4x8(EmitContext& ctx, Id value);
+Id EmitUnpackSnorm4x8(EmitContext& ctx, Id value);
+Id EmitPackUint4x8(EmitContext& ctx, Id value);
+Id EmitUnpackUint4x8(EmitContext& ctx, Id value);
+Id EmitPackSint4x8(EmitContext& ctx, Id value);
+Id EmitUnpackSint4x8(EmitContext& ctx, Id value);
+Id EmitPackUfloat10_11_11(EmitContext& ctx, Id value);
+Id EmitUnpackUfloat10_11_11(EmitContext& ctx, Id value);
+Id EmitPackUnorm2_10_10_10(EmitContext& ctx, Id value);
+Id EmitUnpackUnorm2_10_10_10(EmitContext& ctx, Id value);
+Id EmitPackSnorm2_10_10_10(EmitContext& ctx, Id value);
+Id EmitUnpackSnorm2_10_10_10(EmitContext& ctx, Id value);
+Id EmitPackUint2_10_10_10(EmitContext& ctx, Id value);
+Id EmitUnpackUint2_10_10_10(EmitContext& ctx, Id value);
+Id EmitPackSint2_10_10_10(EmitContext& ctx, Id value);
+Id EmitUnpackSint2_10_10_10(EmitContext& ctx, Id value);
Id EmitFPAbs16(EmitContext& ctx, Id value);
Id EmitFPAbs32(EmitContext& ctx, Id value);
Id EmitFPAbs64(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 2a0c28563..13d727c72 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -74,8 +74,8 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
DefineInterfaces();
DefineSharedMemory();
DefineBuffers();
- DefineTextureBuffers();
DefineImagesAndSamplers();
+ DefineFunctions();
}
EmitContext::~EmitContext() = default;
@@ -205,19 +205,6 @@ void EmitContext::DefineBufferOffsets() {
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
}
- for (TextureBufferDefinition& tex_buffer : texture_buffers) {
- const u32 binding = tex_buffer.binding;
- const u32 half = PushData::BufOffsetIndex + (binding >> 4);
- const u32 comp = (binding & 0xf) >> 2;
- const u32 offset = (binding & 0x3) << 3;
- const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
- push_data_block, ConstU32(half), ConstU32(comp))};
- const Id value{OpLoad(U32[1], ptr)};
- tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U));
- tex_buffer.coord_shift =
- OpBitFieldUExtract(U32[1], value, ConstU32(offset + 6U), ConstU32(2U));
- Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding));
- }
}
void EmitContext::DefineInterpolatedAttribs() {
@@ -676,32 +663,6 @@ void EmitContext::DefineBuffers() {
}
}
-void EmitContext::DefineTextureBuffers() {
- for (const auto& desc : info.texture_buffers) {
- const auto sharp = desc.GetSharp(info);
- const auto nfmt = sharp.GetNumberFmt();
- const bool is_integer = AmdGpu::IsInteger(nfmt);
- const VectorIds& sampled_type{GetAttributeType(*this, nfmt)};
- const u32 sampled = desc.is_written ? 2 : 1;
- const Id image_type{TypeImage(sampled_type[1], spv::Dim::Buffer, false, false, false,
- sampled, spv::ImageFormat::Unknown)};
- const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
- const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
- Decorate(id, spv::Decoration::Binding, binding.unified++);
- Decorate(id, spv::Decoration::DescriptorSet, 0U);
- Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sharp_idx));
- texture_buffers.push_back({
- .id = id,
- .binding = binding.buffer++,
- .image_type = image_type,
- .result_type = sampled_type[4],
- .is_integer = is_integer,
- .is_storage = desc.is_written,
- });
- interfaces.push_back(id);
- }
-}
-
spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
if (image.GetDataFmt() == AmdGpu::DataFormat::Format32 &&
image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) {
@@ -893,4 +854,117 @@ void EmitContext::DefineSharedMemory() {
}
}
+Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
+ const auto func_type{TypeFunction(U32[1], F32[1])};
+ const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
+ const auto value{OpFunctionParameter(F32[1])};
+ Name(func, name);
+ AddLabel();
+
+ const auto raw_value{OpBitcast(U32[1], value)};
+ const auto exponent{
+ OpBitcast(S32[1], OpBitFieldSExtract(U32[1], raw_value, ConstU32(23U), ConstU32(8U)))};
+ const auto sign{OpBitFieldUExtract(U32[1], raw_value, ConstU32(31U), ConstU32(1U))};
+
+ const auto is_zero{OpLogicalOr(U1[1], OpIEqual(U1[1], raw_value, ConstU32(0U)),
+ OpIEqual(U1[1], sign, ConstU32(1U)))};
+ const auto is_nan{OpIsNan(U1[1], value)};
+ const auto is_inf{OpIsInf(U1[1], value)};
+ const auto is_denorm{OpSLessThanEqual(U1[1], exponent, ConstS32(-15))};
+
+ const auto denorm_mantissa{OpConvertFToU(
+ U32[1],
+ OpRoundEven(F32[1], OpFMul(F32[1], value,
+ ConstF32(static_cast(1 << (mantissa_bits + 14))))))};
+ const auto denorm_overflow{
+ OpINotEqual(U1[1], OpShiftRightLogical(U32[1], denorm_mantissa, ConstU32(mantissa_bits)),
+ ConstU32(0U))};
+ const auto denorm{
+ OpSelect(U32[1], denorm_overflow, ConstU32(1U << mantissa_bits), denorm_mantissa)};
+
+ const auto norm_mantissa{OpConvertFToU(
+ U32[1],
+ OpRoundEven(F32[1],
+ OpLdexp(F32[1], value,
+ OpISub(S32[1], ConstS32(static_cast(mantissa_bits)), exponent))))};
+ const auto norm_overflow{
+ OpUGreaterThanEqual(U1[1], norm_mantissa, ConstU32(2U << mantissa_bits))};
+ const auto norm_final_mantissa{OpBitwiseAnd(
+ U32[1],
+ OpSelect(U32[1], norm_overflow, OpShiftRightLogical(U32[1], norm_mantissa, ConstU32(1U)),
+ norm_mantissa),
+ ConstU32((1U << mantissa_bits) - 1))};
+ const auto norm_final_exponent{OpBitcast(
+ U32[1],
+ OpIAdd(S32[1],
+ OpSelect(S32[1], norm_overflow, OpIAdd(S32[1], exponent, ConstS32(1)), exponent),
+ ConstS32(15)))};
+ const auto norm{OpBitFieldInsert(U32[1], norm_final_mantissa, norm_final_exponent,
+ ConstU32(mantissa_bits), ConstU32(5U))};
+
+ const auto result{OpSelect(U32[1], is_zero, ConstU32(0U),
+ OpSelect(U32[1], is_nan, ConstU32(31u << mantissa_bits | 1U),
+ OpSelect(U32[1], is_inf, ConstU32(31U << mantissa_bits),
+ OpSelect(U32[1], is_denorm, denorm, norm))))};
+
+ OpReturnValue(result);
+ OpFunctionEnd();
+ return func;
+}
+
+Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_view name) {
+ // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
+ const auto func_type{TypeFunction(F32[1], U32[1])};
+ const auto func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type)};
+ const auto value{OpFunctionParameter(U32[1])};
+ Name(func, name);
+ AddLabel();
+
+ const auto raw_mantissa{
+ OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))};
+ const auto mantissa{OpConvertUToF(F32[1], raw_mantissa)};
+ const auto exponent{OpBitcast(
+ S32[1], OpBitFieldSExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U)))};
+
+ const auto is_exp_neg_one{OpIEqual(U1[1], exponent, ConstS32(-1))};
+ const auto is_exp_zero{OpIEqual(U1[1], exponent, ConstS32(0))};
+
+ const auto is_zero{OpIEqual(U1[1], value, ConstU32(0u))};
+ const auto is_nan{
+ OpLogicalAnd(U1[1], is_exp_neg_one, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
+ const auto is_inf{
+ OpLogicalAnd(U1[1], is_exp_neg_one, OpIEqual(U1[1], raw_mantissa, ConstU32(0u)))};
+ const auto is_denorm{
+ OpLogicalAnd(U1[1], is_exp_zero, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
+
+ const auto denorm{OpFMul(F32[1], mantissa, ConstF32(1.f / (1 << 20)))};
+ const auto norm{OpLdexp(
+ F32[1],
+ OpFAdd(F32[1],
+ OpFMul(F32[1], mantissa, ConstF32(1.f / static_cast(1 << mantissa_bits))),
+ ConstF32(1.f)),
+ exponent)};
+
+ const auto result{OpSelect(F32[1], is_zero, ConstF32(0.f),
+ OpSelect(F32[1], is_nan, ConstF32(NAN),
+ OpSelect(F32[1], is_inf, ConstF32(INFINITY),
+ OpSelect(F32[1], is_denorm, denorm, norm))))};
+
+ OpReturnValue(result);
+ OpFunctionEnd();
+ return func;
+}
+
+void EmitContext::DefineFunctions() {
+ if (info.uses_pack_10_11_11) {
+ f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
+ f32_to_uf10 = DefineFloat32ToUfloatM5(5, "f32_to_uf10");
+ }
+ if (info.uses_unpack_10_11_11) {
+ uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
+ uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
+ }
+}
+
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index ab42ecc5b..23fca4212 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -235,20 +235,9 @@ public:
const VectorIds* data_types;
Id pointer_type;
};
- struct TextureBufferDefinition {
- Id id;
- Id coord_offset;
- Id coord_shift;
- u32 binding;
- Id image_type;
- Id result_type;
- bool is_integer = false;
- bool is_storage = false;
- };
Bindings& binding;
boost::container::small_vector buffers;
- boost::container::small_vector texture_buffers;
BufferDefinition srt_flatbuf;
boost::container::small_vector images;
boost::container::small_vector samplers;
@@ -271,6 +260,11 @@ public:
std::array output_params{};
std::array frag_outputs{};
+ Id uf11_to_f32{};
+ Id f32_to_uf11{};
+ Id uf10_to_f32{};
+ Id f32_to_uf10{};
+
private:
void DefineArithmeticTypes();
void DefineInterfaces();
@@ -278,12 +272,15 @@ private:
void DefineOutputs();
void DefinePushDataBlock();
void DefineBuffers();
- void DefineTextureBuffers();
void DefineImagesAndSamplers();
void DefineSharedMemory();
+ void DefineFunctions();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
bool output);
+
+ Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
+ Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
};
} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp
index 28c4685db..ece35093a 100644
--- a/src/shader_recompiler/frontend/translate/export.cpp
+++ b/src/shader_recompiler/frontend/translate/export.cpp
@@ -30,28 +30,25 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0);
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
- IR::Value unpacked_value;
- bool is_integer = false;
+ AmdGpu::NumberFormat num_format;
switch (color_buffer.export_format) {
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
// No export
return;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
- unpacked_value = ir.UnpackHalf2x16(value);
+ num_format = AmdGpu::NumberFormat::Float;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
- unpacked_value = ir.UnpackUnorm2x16(value);
+ num_format = AmdGpu::NumberFormat::Unorm;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
- unpacked_value = ir.UnpackSnorm2x16(value);
+ num_format = AmdGpu::NumberFormat::Snorm;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
- unpacked_value = ir.UnpackUint2x16(value);
- is_integer = true;
+ num_format = AmdGpu::NumberFormat::Uint;
break;
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
- unpacked_value = ir.UnpackSint2x16(value);
- is_integer = true;
+ num_format = AmdGpu::NumberFormat::Sint;
break;
default:
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
@@ -59,16 +56,15 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
break;
}
- const auto r = ir.CompositeExtract(unpacked_value, 0);
- const auto g = ir.CompositeExtract(unpacked_value, 1);
- const IR::F32 float_r = is_integer ? ir.BitCast(IR::U32{r}) : IR::F32{r};
- const IR::F32 float_g = is_integer ? ir.BitCast(IR::U32{g}) : IR::F32{g};
+ const auto unpacked_value = ir.Unpack2x16(num_format, value);
+ const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
+ const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
- ExportMrtValue(attribute, swizzled_r, float_r, color_buffer);
- ExportMrtValue(attribute, swizzled_g, float_g, color_buffer);
+ ExportMrtValue(attribute, swizzled_r, r, color_buffer);
+ ExportMrtValue(attribute, swizzled_g, g, color_buffer);
}
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
@@ -115,7 +111,7 @@ void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U3
ExportMrtCompressed(attribute, idx, value);
return;
}
- const IR::Value unpacked_value = ir.UnpackHalf2x16(value);
+ const IR::Value unpacked_value = ir.Unpack2x16(AmdGpu::NumberFormat::Float, value);
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
ir.SetAttribute(attribute, r, idx * 2);
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index f73618dbe..56e903052 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -651,19 +651,19 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) {
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
- SetDst(inst.dst[0], ir.PackUnorm2x16(vec_f32));
+ SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Unorm, vec_f32));
}
void Translator::V_CVT_PKNORM_I16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
- SetDst(inst.dst[0], ir.PackSnorm2x16(vec_f32));
+ SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Snorm, vec_f32));
}
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::Value vec_f32 =
ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
- SetDst(inst.dst[0], ir.PackHalf2x16(vec_f32));
+ SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Float, vec_f32));
}
// VOP1
@@ -1245,14 +1245,16 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
const IR::Value vec_u32 =
- ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
- SetDst(inst.dst[0], ir.PackUint2x16(vec_u32));
+ ir.CompositeConstruct(ir.BitCast(GetSrc(inst.src[0])),
+ ir.BitCast(GetSrc(inst.src[1])));
+ SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Uint, vec_u32));
}
void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) {
const IR::Value vec_u32 =
- ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
- SetDst(inst.dst[0], ir.PackSint2x16(vec_u32));
+ ir.CompositeConstruct(ir.BitCast(GetSrc(inst.src[0])),
+ ir.BitCast(GetSrc(inst.src[1])));
+ SetDst(inst.dst[0], ir.Pack2x16(AmdGpu::NumberFormat::Sint, vec_u32));
}
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index 685785af1..0b911eb57 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -208,7 +208,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
- const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, buffer_info);
+ const IR::Value value = ir.LoadBufferU32(num_dwords, handle, address, buffer_info);
const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::U32{value});
@@ -314,16 +314,18 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
- ir.StoreBuffer(num_dwords, handle, address, value, buffer_info);
+ ir.StoreBufferU32(num_dwords, handle, address, value, buffer_info);
}
void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
const auto& mubuf = inst.control.mubuf;
const IR::VectorReg vaddr{inst.src[0].code};
const IR::ScalarReg sharp{inst.src[2].code * 4};
- ASSERT_MSG(!mubuf.offen && mubuf.offset == 0, "Offsets for image buffers are not supported");
const IR::Value address = [&] -> IR::Value {
- if (mubuf.idxen) {
+ if (mubuf.idxen && mubuf.offen) {
+ return ir.CompositeConstruct(ir.GetVectorReg(vaddr), ir.GetVectorReg(vaddr + 1));
+ }
+ if (mubuf.idxen || mubuf.offen) {
return ir.GetVectorReg(vaddr);
}
return {};
diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h
index 9469eaad7..498752607 100644
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -48,6 +48,7 @@ struct BufferResource {
bool is_instance_data{};
u8 instance_attrib{};
bool is_written{};
+ bool is_formatted{};
[[nodiscard]] bool IsStorage(const AmdGpu::Buffer& buffer) const noexcept {
return buffer.GetSize() > MaxUboSize || is_written || is_gds_buffer;
@@ -57,14 +58,6 @@ struct BufferResource {
};
using BufferResourceList = boost::container::small_vector;
-struct TextureBufferResource {
- u32 sharp_idx;
- bool is_written{};
-
- [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
-};
-using TextureBufferResourceList = boost::container::small_vector;
-
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
@@ -114,11 +107,6 @@ struct PushData {
ASSERT(offset < 256 && binding < buf_offsets.size());
buf_offsets[binding] = offset;
}
-
- void AddTexelOffset(u32 binding, u32 multiplier, u32 texel_offset) {
- ASSERT(texel_offset < 64 && multiplier < 16);
- buf_offsets[binding] = texel_offset | ((std::bit_width(multiplier) - 1) << 6);
- }
};
static_assert(sizeof(PushData) <= 128,
"PushData size is greater than minimum size guaranteed by Vulkan spec");
@@ -175,7 +163,6 @@ struct Info {
u32 uses_patches{};
BufferResourceList buffers;
- TextureBufferResourceList texture_buffers;
ImageResourceList images;
SamplerResourceList samplers;
FMaskResourceList fmasks;
@@ -193,8 +180,6 @@ struct Info {
u64 pgm_hash{};
VAddr pgm_base;
bool has_storage_images{};
- bool has_image_buffers{};
- bool has_texel_buffers{};
bool has_discard{};
bool has_image_gather{};
bool has_image_query{};
@@ -204,6 +189,8 @@ struct Info {
bool uses_shared{};
bool uses_fp16{};
bool uses_fp64{};
+ bool uses_pack_10_11_11{};
+ bool uses_unpack_10_11_11{};
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{}; // indicates that shader has unsupported instructions
@@ -246,8 +233,7 @@ struct Info {
}
void AddBindings(Backend::Bindings& bnd) const {
- const auto total_buffers =
- buffers.size() + texture_buffers.size() + (has_readconst ? 1 : 0);
+ const auto total_buffers = buffers.size() + (has_readconst ? 1 : 0);
bnd.buffer += total_buffers;
bnd.unified += total_buffers + images.size() + samplers.size();
bnd.user_data += ud_mask.NumRegs();
@@ -278,10 +264,6 @@ constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexce
return inline_cbuf ? inline_cbuf : info.ReadUdSharp(sharp_idx);
}
-constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
- return info.ReadUdSharp(sharp_idx);
-}
-
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
const auto image = info.ReadUdSharp(sharp_idx);
if (!image.Valid()) {
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index ecbe1f838..7e3d0f937 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -370,8 +370,16 @@ U32 IREmitter::ReadConstBuffer(const Value& handle, const U32& index) {
return Inst(Opcode::ReadConstBuffer, handle, index);
}
-Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& address,
- BufferInstInfo info) {
+U32 IREmitter::LoadBufferU8(const Value& handle, const Value& address, BufferInstInfo info) {
+ return Inst(Opcode::LoadBufferU8, Flags{info}, handle, address);
+}
+
+U32 IREmitter::LoadBufferU16(const Value& handle, const Value& address, BufferInstInfo info) {
+ return Inst(Opcode::LoadBufferU16, Flags{info}, handle, address);
+}
+
+Value IREmitter::LoadBufferU32(int num_dwords, const Value& handle, const Value& address,
+ BufferInstInfo info) {
switch (num_dwords) {
case 1:
return Inst(Opcode::LoadBufferU32, Flags{info}, handle, address);
@@ -386,12 +394,38 @@ Value IREmitter::LoadBuffer(int num_dwords, const Value& handle, const Value& ad
}
}
+Value IREmitter::LoadBufferF32(int num_dwords, const Value& handle, const Value& address,
+ BufferInstInfo info) {
+ switch (num_dwords) {
+ case 1:
+ return Inst(Opcode::LoadBufferF32, Flags{info}, handle, address);
+ case 2:
+ return Inst(Opcode::LoadBufferF32x2, Flags{info}, handle, address);
+ case 3:
+ return Inst(Opcode::LoadBufferF32x3, Flags{info}, handle, address);
+ case 4:
+ return Inst(Opcode::LoadBufferF32x4, Flags{info}, handle, address);
+ default:
+ UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
+ }
+}
+
Value IREmitter::LoadBufferFormat(const Value& handle, const Value& address, BufferInstInfo info) {
return Inst(Opcode::LoadBufferFormatF32, Flags{info}, handle, address);
}
-void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& address,
- const Value& data, BufferInstInfo info) {
+void IREmitter::StoreBufferU8(const Value& handle, const Value& address, const U32& data,
+ BufferInstInfo info) {
+ Inst(Opcode::StoreBufferU8, Flags{info}, handle, address, data);
+}
+
+void IREmitter::StoreBufferU16(const Value& handle, const Value& address, const U32& data,
+ BufferInstInfo info) {
+ Inst(Opcode::StoreBufferU16, Flags{info}, handle, address, data);
+}
+
+void IREmitter::StoreBufferU32(int num_dwords, const Value& handle, const Value& address,
+ const Value& data, BufferInstInfo info) {
switch (num_dwords) {
case 1:
Inst(Opcode::StoreBufferU32, Flags{info}, handle, address, data);
@@ -410,6 +444,31 @@ void IREmitter::StoreBuffer(int num_dwords, const Value& handle, const Value& ad
}
}
+void IREmitter::StoreBufferF32(int num_dwords, const Value& handle, const Value& address,
+ const Value& data, BufferInstInfo info) {
+ switch (num_dwords) {
+ case 1:
+ Inst(Opcode::StoreBufferF32, Flags{info}, handle, address, data);
+ break;
+ case 2:
+ Inst(Opcode::StoreBufferF32x2, Flags{info}, handle, address, data);
+ break;
+ case 3:
+ Inst(Opcode::StoreBufferF32x3, Flags{info}, handle, address, data);
+ break;
+ case 4:
+ Inst(Opcode::StoreBufferF32x4, Flags{info}, handle, address, data);
+ break;
+ default:
+ UNREACHABLE_MSG("Invalid number of dwords {}", num_dwords);
+ }
+}
+
+void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
+ BufferInstInfo info) {
+ Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
+}
+
Value IREmitter::BufferAtomicIAdd(const Value& handle, const Value& address, const Value& value,
BufferInstInfo info) {
return Inst(Opcode::BufferAtomicIAdd32, Flags{info}, handle, address, value);
@@ -457,11 +516,6 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
}
-void IREmitter::StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
- BufferInstInfo info) {
- Inst(Opcode::StoreBufferFormatF32, Flags{info}, handle, address, data);
-}
-
U32 IREmitter::DataAppend(const U32& counter) {
return Inst(Opcode::DataAppend, counter, Imm32(0));
}
@@ -527,10 +581,14 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
switch (e1.Type()) {
case Type::U32:
return Inst(Opcode::CompositeConstructU32x2, e1, e2);
+ case Type::U32x2:
+ return Inst(Opcode::CompositeConstructU32x2x2, e1, e2);
case Type::F16:
return Inst(Opcode::CompositeConstructF16x2, e1, e2);
case Type::F32:
return Inst(Opcode::CompositeConstructF32x2, e1, e2);
+ case Type::F32x2:
+ return Inst(Opcode::CompositeConstructF32x2x2, e1, e2);
case Type::F64:
return Inst(Opcode::CompositeConstructF64x2, e1, e2);
default:
@@ -779,52 +837,116 @@ F64 IREmitter::PackFloat2x32(const Value& vector) {
return Inst(Opcode::PackFloat2x32, vector);
}
-U32 IREmitter::PackFloat2x16(const Value& vector) {
- return Inst(Opcode::PackFloat2x16, vector);
+U32 IREmitter::Pack2x16(const AmdGpu::NumberFormat number_format, const Value& vector) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Unorm:
+ return Inst(Opcode::PackUnorm2x16, vector);
+ case AmdGpu::NumberFormat::Snorm:
+ return Inst(Opcode::PackSnorm2x16, vector);
+ case AmdGpu::NumberFormat::Uint:
+ return Inst(Opcode::PackUint2x16, vector);
+ case AmdGpu::NumberFormat::Sint:
+ return Inst(Opcode::PackSint2x16, vector);
+ case AmdGpu::NumberFormat::Float:
+ return Inst(Opcode::PackHalf2x16, vector);
+ default:
+ UNREACHABLE_MSG("Unsupported 2x16 number format: {}", number_format);
+ }
}
-Value IREmitter::UnpackFloat2x16(const U32& value) {
- return Inst(Opcode::UnpackFloat2x16, value);
+Value IREmitter::Unpack2x16(const AmdGpu::NumberFormat number_format, const U32& value) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Unorm:
+ return Inst(Opcode::UnpackUnorm2x16, value);
+ case AmdGpu::NumberFormat::Snorm:
+ return Inst(Opcode::UnpackSnorm2x16, value);
+ case AmdGpu::NumberFormat::Uint:
+ return Inst(Opcode::UnpackUint2x16, value);
+ case AmdGpu::NumberFormat::Sint:
+ return Inst(Opcode::UnpackSint2x16, value);
+ case AmdGpu::NumberFormat::Float:
+ return Inst(Opcode::UnpackHalf2x16, value);
+ default:
+ UNREACHABLE_MSG("Unsupported 2x16 number format: {}", number_format);
+ }
}
-U32 IREmitter::PackHalf2x16(const Value& vector) {
- return Inst(Opcode::PackHalf2x16, vector);
+U32 IREmitter::Pack4x8(const AmdGpu::NumberFormat number_format, const Value& vector) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Unorm:
+ return Inst(Opcode::PackUnorm4x8, vector);
+ case AmdGpu::NumberFormat::Snorm:
+ return Inst(Opcode::PackSnorm4x8, vector);
+ case AmdGpu::NumberFormat::Uint:
+ return Inst(Opcode::PackUint4x8, vector);
+ case AmdGpu::NumberFormat::Sint:
+ return Inst(Opcode::PackSint4x8, vector);
+ default:
+ UNREACHABLE_MSG("Unsupported 4x8 number format: {}", number_format);
+ }
}
-Value IREmitter::UnpackHalf2x16(const U32& value) {
- return Inst(Opcode::UnpackHalf2x16, value);
+Value IREmitter::Unpack4x8(const AmdGpu::NumberFormat number_format, const U32& value) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Unorm:
+ return Inst(Opcode::UnpackUnorm4x8, value);
+ case AmdGpu::NumberFormat::Snorm:
+ return Inst(Opcode::UnpackSnorm4x8, value);
+ case AmdGpu::NumberFormat::Uint:
+ return Inst(Opcode::UnpackUint4x8, value);
+ case AmdGpu::NumberFormat::Sint:
+ return Inst(Opcode::UnpackSint4x8, value);
+ default:
+ UNREACHABLE_MSG("Unsupported 4x8 number format: {}", number_format);
+ }
}
-U32 IREmitter::PackUnorm2x16(const Value& vector) {
- return Inst(Opcode::PackUnorm2x16, vector);
+U32 IREmitter::Pack10_11_11(const AmdGpu::NumberFormat number_format, const Value& vector) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Float:
+ return Inst(Opcode::PackUfloat10_11_11, vector);
+ default:
+ UNREACHABLE_MSG("Unsupported 10_11_11 number format: {}", number_format);
+ }
}
-Value IREmitter::UnpackUnorm2x16(const U32& value) {
- return Inst(Opcode::UnpackUnorm2x16, value);
+U32 IREmitter::Pack2_10_10_10(const AmdGpu::NumberFormat number_format, const Value& vector) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Unorm:
+ return Inst(Opcode::PackUnorm2_10_10_10, vector);
+ case AmdGpu::NumberFormat::Snorm:
+ return Inst(Opcode::PackSnorm2_10_10_10, vector);
+ case AmdGpu::NumberFormat::Uint:
+ return Inst(Opcode::PackUint2_10_10_10, vector);
+ case AmdGpu::NumberFormat::Sint:
+ return Inst(Opcode::PackSint2_10_10_10, vector);
+ default:
+ UNREACHABLE_MSG("Unsupported 2_10_10_10 number format: {}", number_format);
+ }
}
-U32 IREmitter::PackSnorm2x16(const Value& vector) {
- return Inst(Opcode::PackSnorm2x16, vector);
+Value IREmitter::Unpack2_10_10_10(const AmdGpu::NumberFormat number_format, const U32& value) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Unorm:
+ return Inst(Opcode::UnpackUnorm2_10_10_10, value);
+ case AmdGpu::NumberFormat::Snorm:
+ return Inst(Opcode::UnpackSnorm2_10_10_10, value);
+ case AmdGpu::NumberFormat::Uint:
+ return Inst(Opcode::UnpackUint2_10_10_10, value);
+ case AmdGpu::NumberFormat::Sint:
+ return Inst(Opcode::UnpackSint2_10_10_10, value);
+ default:
+ UNREACHABLE_MSG("Unsupported 2_10_10_10 number format: {}", number_format);
+ }
}
-Value IREmitter::UnpackSnorm2x16(const U32& value) {
- return Inst(Opcode::UnpackSnorm2x16, value);
-}
-
-U32 IREmitter::PackUint2x16(const Value& value) {
- return Inst(Opcode::PackUint2x16, value);
-}
-
-Value IREmitter::UnpackUint2x16(const U32& value) {
- return Inst(Opcode::UnpackUint2x16, value);
-}
-
-U32 IREmitter::PackSint2x16(const Value& value) {
- return Inst(Opcode::PackSint2x16, value);
-}
-
-Value IREmitter::UnpackSint2x16(const U32& value) {
- return Inst(Opcode::UnpackSint2x16, value);
+Value IREmitter::Unpack10_11_11(const AmdGpu::NumberFormat number_format, const U32& value) {
+ switch (number_format) {
+ case AmdGpu::NumberFormat::Float:
+ return Inst(Opcode::UnpackUfloat10_11_11, value);
+ default:
+ UNREACHABLE_MSG("Unsupported 10_11_11 number format: {}", number_format);
+ }
}
F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index 97b94187a..7ac75bf70 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -109,12 +109,22 @@ public:
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
- [[nodiscard]] Value LoadBuffer(int num_dwords, const Value& handle, const Value& address,
- BufferInstInfo info);
+ [[nodiscard]] U32 LoadBufferU8(const Value& handle, const Value& address, BufferInstInfo info);
+ [[nodiscard]] U32 LoadBufferU16(const Value& handle, const Value& address, BufferInstInfo info);
+ [[nodiscard]] Value LoadBufferU32(int num_dwords, const Value& handle, const Value& address,
+ BufferInstInfo info);
+ [[nodiscard]] Value LoadBufferF32(int num_dwords, const Value& handle, const Value& address,
+ BufferInstInfo info);
[[nodiscard]] Value LoadBufferFormat(const Value& handle, const Value& address,
BufferInstInfo info);
- void StoreBuffer(int num_dwords, const Value& handle, const Value& address, const Value& data,
- BufferInstInfo info);
+ void StoreBufferU8(const Value& handle, const Value& address, const U32& data,
+ BufferInstInfo info);
+ void StoreBufferU16(const Value& handle, const Value& address, const U32& data,
+ BufferInstInfo info);
+ void StoreBufferU32(int num_dwords, const Value& handle, const Value& address,
+ const Value& data, BufferInstInfo info);
+ void StoreBufferF32(int num_dwords, const Value& handle, const Value& address,
+ const Value& data, BufferInstInfo info);
void StoreBufferFormat(const Value& handle, const Value& address, const Value& data,
BufferInstInfo info);
@@ -167,22 +177,19 @@ public:
[[nodiscard]] U64 PackUint2x32(const Value& vector);
[[nodiscard]] Value UnpackUint2x32(const U64& value);
-
[[nodiscard]] F64 PackFloat2x32(const Value& vector);
- [[nodiscard]] U32 PackFloat2x16(const Value& vector);
- [[nodiscard]] Value UnpackFloat2x16(const U32& value);
+ [[nodiscard]] U32 Pack2x16(AmdGpu::NumberFormat number_format, const Value& vector);
+ [[nodiscard]] Value Unpack2x16(AmdGpu::NumberFormat number_format, const U32& value);
- [[nodiscard]] U32 PackHalf2x16(const Value& vector);
- [[nodiscard]] Value UnpackHalf2x16(const U32& value);
- [[nodiscard]] U32 PackUnorm2x16(const Value& vector);
- [[nodiscard]] Value UnpackUnorm2x16(const U32& value);
- [[nodiscard]] U32 PackSnorm2x16(const Value& vector);
- [[nodiscard]] Value UnpackSnorm2x16(const U32& value);
- [[nodiscard]] U32 PackUint2x16(const Value& value);
- [[nodiscard]] Value UnpackUint2x16(const U32& value);
- [[nodiscard]] U32 PackSint2x16(const Value& value);
- [[nodiscard]] Value UnpackSint2x16(const U32& value);
+ [[nodiscard]] U32 Pack4x8(AmdGpu::NumberFormat number_format, const Value& vector);
+ [[nodiscard]] Value Unpack4x8(AmdGpu::NumberFormat number_format, const U32& value);
+
+ [[nodiscard]] U32 Pack10_11_11(AmdGpu::NumberFormat number_format, const Value& vector);
+ [[nodiscard]] Value Unpack10_11_11(AmdGpu::NumberFormat number_format, const U32& value);
+
+ [[nodiscard]] U32 Pack2_10_10_10(AmdGpu::NumberFormat number_format, const Value& vector);
+ [[nodiscard]] Value Unpack2_10_10_10(AmdGpu::NumberFormat number_format, const U32& value);
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
diff --git a/src/shader_recompiler/ir/microinstruction.cpp b/src/shader_recompiler/ir/microinstruction.cpp
index 6e7bbe661..fdbc019e3 100644
--- a/src/shader_recompiler/ir/microinstruction.cpp
+++ b/src/shader_recompiler/ir/microinstruction.cpp
@@ -54,10 +54,16 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::SetAttribute:
case Opcode::SetTcsGenericAttribute:
case Opcode::SetPatch:
+ case Opcode::StoreBufferU8:
+ case Opcode::StoreBufferU16:
case Opcode::StoreBufferU32:
case Opcode::StoreBufferU32x2:
case Opcode::StoreBufferU32x3:
case Opcode::StoreBufferU32x4:
+ case Opcode::StoreBufferF32:
+ case Opcode::StoreBufferF32x2:
+ case Opcode::StoreBufferF32x3:
+ case Opcode::StoreBufferF32x4:
case Opcode::StoreBufferFormatF32:
case Opcode::BufferAtomicIAdd32:
case Opcode::BufferAtomicSMin32:
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index 6750be5a6..0d87430d2 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -90,15 +90,27 @@ OPCODE(UndefU32, U32,
OPCODE(UndefU64, U64, )
// Buffer operations
+OPCODE(LoadBufferU8, U32, Opaque, Opaque, )
+OPCODE(LoadBufferU16, U32, Opaque, Opaque, )
OPCODE(LoadBufferU32, U32, Opaque, Opaque, )
OPCODE(LoadBufferU32x2, U32x2, Opaque, Opaque, )
OPCODE(LoadBufferU32x3, U32x3, Opaque, Opaque, )
OPCODE(LoadBufferU32x4, U32x4, Opaque, Opaque, )
+OPCODE(LoadBufferF32, F32, Opaque, Opaque, )
+OPCODE(LoadBufferF32x2, F32x2, Opaque, Opaque, )
+OPCODE(LoadBufferF32x3, F32x3, Opaque, Opaque, )
+OPCODE(LoadBufferF32x4, F32x4, Opaque, Opaque, )
OPCODE(LoadBufferFormatF32, F32x4, Opaque, Opaque, )
+OPCODE(StoreBufferU8, Void, Opaque, Opaque, U32, )
+OPCODE(StoreBufferU16, Void, Opaque, Opaque, U32, )
OPCODE(StoreBufferU32, Void, Opaque, Opaque, U32, )
OPCODE(StoreBufferU32x2, Void, Opaque, Opaque, U32x2, )
OPCODE(StoreBufferU32x3, Void, Opaque, Opaque, U32x3, )
OPCODE(StoreBufferU32x4, Void, Opaque, Opaque, U32x4, )
+OPCODE(StoreBufferF32, Void, Opaque, Opaque, F32, )
+OPCODE(StoreBufferF32x2, Void, Opaque, Opaque, F32x2, )
+OPCODE(StoreBufferF32x3, Void, Opaque, Opaque, F32x3, )
+OPCODE(StoreBufferF32x4, Void, Opaque, Opaque, F32x4, )
OPCODE(StoreBufferFormatF32, Void, Opaque, Opaque, F32x4, )
// Buffer atomic operations
@@ -118,6 +130,7 @@ OPCODE(BufferAtomicSwap32, U32, Opaq
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
+OPCODE(CompositeConstructU32x2x2, U32x4, U32x2, U32x2, )
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
@@ -142,6 +155,7 @@ OPCODE(CompositeShuffleF16x4, F16x4, F16x
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
+OPCODE(CompositeConstructF32x2x2, F32x4, F32x2, F32x2, )
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
@@ -180,21 +194,42 @@ OPCODE(BitCastU64F64, U64, F64,
OPCODE(BitCastF16U16, F16, U16, )
OPCODE(BitCastF32U32, F32, U32, )
OPCODE(BitCastF64U64, F64, U64, )
+
OPCODE(PackUint2x32, U64, U32x2, )
OPCODE(UnpackUint2x32, U32x2, U64, )
OPCODE(PackFloat2x32, F64, F32x2, )
-OPCODE(PackFloat2x16, U32, F16x2, )
-OPCODE(UnpackFloat2x16, F16x2, U32, )
-OPCODE(PackHalf2x16, U32, F32x2, )
-OPCODE(UnpackHalf2x16, F32x2, U32, )
+
OPCODE(PackUnorm2x16, U32, F32x2, )
OPCODE(UnpackUnorm2x16, F32x2, U32, )
OPCODE(PackSnorm2x16, U32, F32x2, )
OPCODE(UnpackSnorm2x16, F32x2, U32, )
-OPCODE(PackUint2x16, U32, U32x2, )
-OPCODE(UnpackUint2x16, U32x2, U32, )
-OPCODE(PackSint2x16, U32, U32x2, )
-OPCODE(UnpackSint2x16, U32x2, U32, )
+OPCODE(PackUint2x16, U32, F32x2, )
+OPCODE(UnpackUint2x16, F32x2, U32, )
+OPCODE(PackSint2x16, U32, F32x2, )
+OPCODE(UnpackSint2x16, F32x2, U32, )
+OPCODE(PackHalf2x16, U32, F32x2, )
+OPCODE(UnpackHalf2x16, F32x2, U32, )
+
+OPCODE(PackUnorm4x8, U32, F32x4, )
+OPCODE(UnpackUnorm4x8, F32x4, U32, )
+OPCODE(PackSnorm4x8, U32, F32x4, )
+OPCODE(UnpackSnorm4x8, F32x4, U32, )
+OPCODE(PackUint4x8, U32, F32x4, )
+OPCODE(UnpackUint4x8, F32x4, U32, )
+OPCODE(PackSint4x8, U32, F32x4, )
+OPCODE(UnpackSint4x8, F32x4, U32, )
+
+OPCODE(PackUfloat10_11_11, U32, F32x3, )
+OPCODE(UnpackUfloat10_11_11, F32x3, U32, )
+
+OPCODE(PackUnorm2_10_10_10, U32, F32x4, )
+OPCODE(UnpackUnorm2_10_10_10, F32x4, U32, )
+OPCODE(PackSnorm2_10_10_10, U32, F32x4, )
+OPCODE(UnpackSnorm2_10_10_10, F32x4, U32, )
+OPCODE(PackUint2_10_10_10, U32, F32x4, )
+OPCODE(UnpackUint2_10_10_10, F32x4, U32, )
+OPCODE(PackSint2_10_10_10, U32, F32x4, )
+OPCODE(UnpackSint2_10_10_10, F32x4, U32, )
// Floating-point operations
OPCODE(FPAbs32, F32, F32, )
diff --git a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
index c72b9e835..c8a4b13cb 100644
--- a/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
@@ -340,14 +340,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
return FoldBitCast(inst, IR::Opcode::BitCastU32F32);
case IR::Opcode::BitCastU32F32:
return FoldBitCast(inst, IR::Opcode::BitCastF32U32);
- case IR::Opcode::PackHalf2x16:
- return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
- case IR::Opcode::UnpackHalf2x16:
- return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
- case IR::Opcode::PackFloat2x16:
- return FoldInverseFunc(inst, IR::Opcode::UnpackFloat2x16);
- case IR::Opcode::UnpackFloat2x16:
- return FoldInverseFunc(inst, IR::Opcode::PackFloat2x16);
+ // 2x16
case IR::Opcode::PackUnorm2x16:
return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm2x16);
case IR::Opcode::UnpackUnorm2x16:
@@ -364,6 +357,49 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
return FoldInverseFunc(inst, IR::Opcode::UnpackSint2x16);
case IR::Opcode::UnpackSint2x16:
return FoldInverseFunc(inst, IR::Opcode::PackSint2x16);
+ case IR::Opcode::PackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16);
+ case IR::Opcode::UnpackHalf2x16:
+ return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16);
+ // 4x8
+ case IR::Opcode::PackUnorm4x8:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm4x8);
+ case IR::Opcode::UnpackUnorm4x8:
+ return FoldInverseFunc(inst, IR::Opcode::PackUnorm4x8);
+ case IR::Opcode::PackSnorm4x8:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackSnorm4x8);
+ case IR::Opcode::UnpackSnorm4x8:
+ return FoldInverseFunc(inst, IR::Opcode::PackSnorm4x8);
+ case IR::Opcode::PackUint4x8:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackUint4x8);
+ case IR::Opcode::UnpackUint4x8:
+ return FoldInverseFunc(inst, IR::Opcode::PackUint4x8);
+ case IR::Opcode::PackSint4x8:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackSint4x8);
+ case IR::Opcode::UnpackSint4x8:
+ return FoldInverseFunc(inst, IR::Opcode::PackSint4x8);
+ // 10_11_11
+ case IR::Opcode::PackUfloat10_11_11:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackUfloat10_11_11);
+ case IR::Opcode::UnpackUfloat10_11_11:
+ return FoldInverseFunc(inst, IR::Opcode::PackUfloat10_11_11);
+ // 2_10_10_10
+ case IR::Opcode::PackUnorm2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackUnorm2_10_10_10);
+ case IR::Opcode::UnpackUnorm2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::PackUnorm2_10_10_10);
+ case IR::Opcode::PackSnorm2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackSnorm2_10_10_10);
+ case IR::Opcode::UnpackSnorm2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::PackSnorm2_10_10_10);
+ case IR::Opcode::PackUint2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackUint2_10_10_10);
+ case IR::Opcode::UnpackUint2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::PackUint2_10_10_10);
+ case IR::Opcode::PackSint2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::UnpackSint2_10_10_10);
+ case IR::Opcode::UnpackSint2_10_10_10:
+ return FoldInverseFunc(inst, IR::Opcode::PackSint2_10_10_10);
case IR::Opcode::SelectU1:
case IR::Opcode::SelectU8:
case IR::Opcode::SelectU16:
diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h
index 8a71d9e1f..0d6816ae0 100644
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -19,6 +19,7 @@ void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
+void LowerBufferFormatToRaw(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
new file mode 100644
index 000000000..b30b022f8
--- /dev/null
+++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
@@ -0,0 +1,211 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/info.h"
+#include "shader_recompiler/ir/basic_block.h"
+#include "shader_recompiler/ir/ir_emitter.h"
+#include "shader_recompiler/ir/program.h"
+#include "shader_recompiler/ir/reinterpret.h"
+#include "video_core/amdgpu/resource.h"
+
+namespace Shader::Optimization {
+
+static bool IsBufferFormatLoad(const IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32;
+}
+
+static bool IsBufferFormatStore(const IR::Inst& inst) {
+ return inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
+}
+
+static IR::Value LoadBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer,
+ const IR::Value handle, const IR::U32 address,
+ const IR::BufferInstInfo info) {
+ const auto data_fmt = buffer.GetDataFmt();
+ const auto num_fmt = buffer.GetNumberFmt();
+ const auto num_conv = buffer.GetNumberConversion();
+ const auto num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
+
+ IR::Value interpreted;
+ switch (data_fmt) {
+ case AmdGpu::DataFormat::FormatInvalid:
+ interpreted = ir.Imm32(0.f);
+ break;
+ case AmdGpu::DataFormat::Format8: {
+ const auto unpacked = ir.Unpack4x8(num_fmt, ir.LoadBufferU8(handle, address, info));
+ interpreted = ir.CompositeExtract(unpacked, 0);
+ break;
+ }
+ case AmdGpu::DataFormat::Format8_8: {
+ const auto raw = ir.LoadBufferU16(handle, address, info);
+ const auto unpacked = ir.Unpack4x8(num_fmt, raw);
+ interpreted = ir.CompositeConstruct(ir.CompositeExtract(unpacked, 0),
+ ir.CompositeExtract(unpacked, 1));
+ break;
+ }
+ case AmdGpu::DataFormat::Format8_8_8_8:
+ interpreted = ir.Unpack4x8(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
+ break;
+ case AmdGpu::DataFormat::Format16: {
+ const auto unpacked = ir.Unpack2x16(num_fmt, ir.LoadBufferU16(handle, address, info));
+ interpreted = ir.CompositeExtract(unpacked, 0);
+ break;
+ }
+ case AmdGpu::DataFormat::Format16_16:
+ interpreted = ir.Unpack2x16(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
+ break;
+ case AmdGpu::DataFormat::Format10_11_11:
+ interpreted =
+ ir.Unpack10_11_11(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
+ break;
+ case AmdGpu::DataFormat::Format2_10_10_10:
+ interpreted =
+ ir.Unpack2_10_10_10(num_fmt, IR::U32{ir.LoadBufferU32(1, handle, address, info)});
+ break;
+ case AmdGpu::DataFormat::Format16_16_16_16: {
+ const auto raw = ir.LoadBufferU32(2, handle, address, info);
+ interpreted =
+ ir.CompositeConstruct(ir.Unpack2x16(num_fmt, IR::U32{ir.CompositeExtract(raw, 0)}),
+ ir.Unpack2x16(num_fmt, IR::U32{ir.CompositeExtract(raw, 1)}));
+ break;
+ }
+ case AmdGpu::DataFormat::Format32:
+ case AmdGpu::DataFormat::Format32_32:
+ case AmdGpu::DataFormat::Format32_32_32:
+ case AmdGpu::DataFormat::Format32_32_32_32: {
+ ASSERT(num_fmt == AmdGpu::NumberFormat::Uint || num_fmt == AmdGpu::NumberFormat::Sint ||
+ num_fmt == AmdGpu::NumberFormat::Float);
+ interpreted = ir.LoadBufferF32(num_components, handle, address, info);
+ break;
+ }
+ default:
+ UNREACHABLE_MSG("Unsupported buffer data format: {}", data_fmt);
+ }
+
+ // Pad to 4 components and apply additional modifications.
+ boost::container::static_vector components;
+ for (u32 i = 0; i < 4; i++) {
+ if (i < num_components) {
+ const auto component =
+ IR::F32{num_components == 1 ? interpreted : ir.CompositeExtract(interpreted, i)};
+ components.push_back(ApplyReadNumberConversion(ir, component, num_conv));
+ } else {
+ components.push_back(ir.Imm32(0.f));
+ }
+ }
+ const auto swizzled = ApplySwizzle(ir, ir.CompositeConstruct(components), buffer.DstSelect());
+ return swizzled;
+}
+
+static void StoreBufferFormat(IR::IREmitter& ir, const AmdGpu::Buffer& buffer,
+ const IR::Value handle, const IR::U32 address, const IR::Value& value,
+ const IR::BufferInstInfo info) {
+ const auto data_fmt = buffer.GetDataFmt();
+ const auto num_fmt = buffer.GetNumberFmt();
+ const auto num_conv = buffer.GetNumberConversion();
+ const auto num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
+
+ // Extract actual number of components and apply additional modifications.
+ const auto swizzled = ApplySwizzle(ir, value, buffer.DstSelect().Inverse());
+ boost::container::static_vector components;
+ for (u32 i = 0; i < num_components; i++) {
+ const auto component = IR::F32{ir.CompositeExtract(swizzled, i)};
+ components.push_back(ApplyWriteNumberConversion(ir, component, num_conv));
+ }
+ const auto real_value =
+ components.size() == 1 ? components[0] : ir.CompositeConstruct(components);
+
+ switch (data_fmt) {
+ case AmdGpu::DataFormat::FormatInvalid:
+ break;
+ case AmdGpu::DataFormat::Format8: {
+ const auto packed =
+ ir.Pack4x8(num_fmt, ir.CompositeConstruct(real_value, ir.Imm32(0.f), ir.Imm32(0.f),
+ ir.Imm32(0.f)));
+ ir.StoreBufferU8(handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format8_8: {
+ const auto packed =
+ ir.Pack4x8(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 0),
+ ir.CompositeExtract(real_value, 1),
+ ir.Imm32(0.f), ir.Imm32(0.f)));
+ ir.StoreBufferU16(handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format8_8_8_8: {
+ auto packed = ir.Pack4x8(num_fmt, real_value);
+ ir.StoreBufferU32(1, handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format16: {
+ const auto packed = ir.Pack2x16(num_fmt, ir.CompositeConstruct(real_value, ir.Imm32(0.f)));
+ ir.StoreBufferU16(handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format16_16: {
+ const auto packed = ir.Pack2x16(num_fmt, real_value);
+ ir.StoreBufferU32(1, handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format10_11_11: {
+ const auto packed = ir.Pack10_11_11(num_fmt, real_value);
+ ir.StoreBufferU32(1, handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format2_10_10_10: {
+ const auto packed = ir.Pack2_10_10_10(num_fmt, real_value);
+ ir.StoreBufferU32(1, handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format16_16_16_16: {
+ const auto packed = ir.CompositeConstruct(
+ ir.Pack2x16(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 0),
+ ir.CompositeExtract(real_value, 1))),
+ ir.Pack2x16(num_fmt, ir.CompositeConstruct(ir.CompositeExtract(real_value, 2),
+ ir.CompositeExtract(real_value, 3))));
+ ir.StoreBufferU32(2, handle, address, packed, info);
+ break;
+ }
+ case AmdGpu::DataFormat::Format32:
+ case AmdGpu::DataFormat::Format32_32:
+ case AmdGpu::DataFormat::Format32_32_32:
+ case AmdGpu::DataFormat::Format32_32_32_32: {
+ ASSERT(num_fmt == AmdGpu::NumberFormat::Uint || num_fmt == AmdGpu::NumberFormat::Sint ||
+ num_fmt == AmdGpu::NumberFormat::Float);
+ ir.StoreBufferF32(num_components, handle, address, real_value, info);
+ break;
+ }
+ default:
+ UNREACHABLE_MSG("Unsupported buffer data format: {}", data_fmt);
+ }
+}
+
+static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) {
+ IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
+ const auto desc{info.buffers[inst.Arg(0).U32()]};
+ const auto buffer{desc.GetSharp(info)};
+
+ if (IsBufferFormatLoad(inst)) {
+ const auto interpreted = LoadBufferFormat(ir, buffer, inst.Arg(0), IR::U32{inst.Arg(1)},
+ inst.Flags());
+ inst.ReplaceUsesWithAndRemove(interpreted);
+ } else if (IsBufferFormatStore(inst)) {
+ StoreBufferFormat(ir, buffer, inst.Arg(0), IR::U32{inst.Arg(1)}, inst.Arg(2),
+ inst.Flags());
+ inst.Invalidate();
+ }
+}
+
+void LowerBufferFormatToRaw(IR::Program& program) {
+ auto& info = program.info;
+ for (IR::Block* const block : program.blocks) {
+ for (IR::Inst& inst : block->Instructions()) {
+ if (IsBufferFormatLoad(inst) || IsBufferFormatStore(inst)) {
+ LowerBufferFormatInst(*block, inst, info);
+ }
+ }
+ }
+}
+
+} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index c5f98e5b9..029558d9e 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -1,8 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
-#include
-#include
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h"
@@ -37,10 +35,17 @@ bool IsBufferAtomic(const IR::Inst& inst) {
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
+ case IR::Opcode::StoreBufferU8:
+ case IR::Opcode::StoreBufferU16:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
+ case IR::Opcode::StoreBufferF32:
+ case IR::Opcode::StoreBufferF32x2:
+ case IR::Opcode::StoreBufferF32x3:
+ case IR::Opcode::StoreBufferF32x4:
+ case IR::Opcode::StoreBufferFormatF32:
return true;
default:
return IsBufferAtomic(inst);
@@ -49,10 +54,17 @@ bool IsBufferStore(const IR::Inst& inst) {
bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
+ case IR::Opcode::LoadBufferU8:
+ case IR::Opcode::LoadBufferU16:
case IR::Opcode::LoadBufferU32:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
+ case IR::Opcode::LoadBufferF32:
+ case IR::Opcode::LoadBufferF32x2:
+ case IR::Opcode::LoadBufferF32x3:
+ case IR::Opcode::LoadBufferF32x4:
+ case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::ReadConstBuffer:
return true;
default:
@@ -65,34 +77,6 @@ bool IsDataRingInstruction(const IR::Inst& inst) {
inst.GetOpcode() == IR::Opcode::DataConsume;
}
-bool IsTextureBufferInstruction(const IR::Inst& inst) {
- return inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
- inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32;
-}
-
-bool UseFP16(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) {
- switch (num_format) {
- case AmdGpu::NumberFormat::Float:
- switch (data_format) {
- case AmdGpu::DataFormat::Format16:
- case AmdGpu::DataFormat::Format16_16:
- case AmdGpu::DataFormat::Format16_16_16_16:
- return true;
- default:
- return false;
- }
- case AmdGpu::NumberFormat::Unorm:
- case AmdGpu::NumberFormat::Snorm:
- case AmdGpu::NumberFormat::Uscaled:
- case AmdGpu::NumberFormat::Sscaled:
- case AmdGpu::NumberFormat::Uint:
- case AmdGpu::NumberFormat::Sint:
- case AmdGpu::NumberFormat::SnormNz:
- default:
- return false;
- }
-}
-
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
return IR::Type::U32;
}
@@ -132,8 +116,7 @@ bool IsImageInstruction(const IR::Inst& inst) {
class Descriptors {
public:
explicit Descriptors(Info& info_)
- : info{info_}, buffer_resources{info_.buffers},
- texture_buffer_resources{info_.texture_buffers}, image_resources{info_.images},
+ : info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}
u32 Add(const BufferResource& desc) {
@@ -147,15 +130,7 @@ public:
auto& buffer = buffer_resources[index];
buffer.used_types |= desc.used_types;
buffer.is_written |= desc.is_written;
- return index;
- }
-
- u32 Add(const TextureBufferResource& desc) {
- const u32 index{Add(texture_buffer_resources, desc, [&desc](const auto& existing) {
- return desc.sharp_idx == existing.sharp_idx;
- })};
- auto& buffer = texture_buffer_resources[index];
- buffer.is_written |= desc.is_written;
+ buffer.is_formatted |= desc.is_formatted;
return index;
}
@@ -196,7 +171,6 @@ private:
const Info& info;
BufferResourceList& buffer_resources;
- TextureBufferResourceList& texture_buffer_resources;
ImageResourceList& image_resources;
SamplerResourceList& sampler_resources;
FMaskResourceList& fmask_resources;
@@ -313,6 +287,8 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
.sharp_idx = sharp,
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
.is_written = IsBufferStore(inst),
+ .is_formatted = inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32 ||
+ inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
});
}
@@ -321,21 +297,6 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
inst.SetArg(0, ir.Imm32(binding));
}
-void PatchTextureBufferSharp(IR::Block& block, IR::Inst& inst, Info& info,
- Descriptors& descriptors) {
- const IR::Inst* handle = inst.Arg(0).InstRecursive();
- const IR::Inst* producer = handle->Arg(0).InstRecursive();
- const auto sharp = TrackSharp(producer, info);
- const s32 binding = descriptors.Add(TextureBufferResource{
- .sharp_idx = sharp,
- .is_written = inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32,
- });
-
- // Replace handle with binding index in texture buffer resource list.
- IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
- inst.SetArg(0, ir.Imm32(binding));
-}
-
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
const auto pred = [](const IR::Inst* inst) -> std::optional {
const auto opcode = inst->GetOpcode();
@@ -553,36 +514,6 @@ void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
inst.SetArg(1, CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
}
-void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
- const auto handle = inst.Arg(0);
- const auto buffer_res = info.texture_buffers[handle.U32()];
- const auto buffer = buffer_res.GetSharp(info);
-
- // Only linear addressing with index is supported currently, since we cannot yet
- // address with sub-texel granularity.
- const auto inst_info = inst.Flags();
- ASSERT_MSG(!buffer.swizzle_enable && !inst_info.offset_enable && inst_info.inst_offset == 0,
- "Unsupported texture buffer address mode.");
-
- IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
- // Stride of 1 to get an index into formatted data. See above addressing limitations.
- inst.SetArg(1, CalculateBufferAddress(ir, inst, info, buffer, 1U));
-
- if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) {
- const auto swizzled = ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect().Inverse());
- const auto converted =
- ApplyWriteNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
- inst.SetArg(2, converted);
- } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) {
- const auto inst_info = inst.Flags();
- const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info);
- const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect());
- const auto converted =
- ApplyReadNumberConversionVec4(ir, swizzled, buffer.GetNumberConversion());
- inst.ReplaceUsesWith(converted);
- }
-}
-
IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR::Value& x,
const IR::Value& y, const IR::Value& face) {
if (!image.IsCube()) {
@@ -861,8 +792,6 @@ void ResourceTrackingPass(IR::Program& program) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {
PatchBufferSharp(*block, inst, info, descriptors);
- } else if (IsTextureBufferInstruction(inst)) {
- PatchTextureBufferSharp(*block, inst, info, descriptors);
} else if (IsImageInstruction(inst)) {
PatchImageSharp(*block, inst, info, descriptors);
} else if (IsDataRingInstruction(inst)) {
@@ -876,8 +805,6 @@ void ResourceTrackingPass(IR::Program& program) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {
PatchBufferArgs(*block, inst, info);
- } else if (IsTextureBufferInstruction(inst)) {
- PatchTextureBufferArgs(*block, inst, info);
} else if (IsImageInstruction(inst)) {
PatchImageArgs(*block, inst, info);
}
diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
index 7fd5b75ff..f3a1fc9a8 100644
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@@ -50,12 +50,6 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::ImageWrite:
info.has_storage_images = true;
break;
- case IR::Opcode::LoadBufferFormatF32:
- info.has_texel_buffers = true;
- break;
- case IR::Opcode::StoreBufferFormatF32:
- info.has_image_buffers = true;
- break;
case IR::Opcode::QuadShuffle:
info.uses_group_quad = true;
break;
@@ -82,6 +76,12 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::ReadConst:
info.has_readconst = true;
break;
+ case IR::Opcode::PackUfloat10_11_11:
+ info.uses_pack_10_11_11 = true;
+ break;
+ case IR::Opcode::UnpackUfloat10_11_11:
+ info.uses_unpack_10_11_11 = true;
+ break;
default:
break;
}
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index 01518ab8f..a9f7aeb40 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -88,6 +88,7 @@ IR::Program TranslateProgram(std::span code, Pools& pools, Info& info
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);
+ Shader::Optimization::LowerBufferFormatToRaw(program);
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h
index 2083d11a9..4328193b5 100644
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -19,30 +19,30 @@ struct VsAttribSpecialization {
};
struct BufferSpecialization {
- u16 stride : 14;
- u16 is_storage : 1;
- u16 swizzle_enable : 1;
- u8 index_stride : 2 = 0;
- u8 element_size : 2 = 0;
+ u32 stride : 14;
+ u32 is_storage : 1;
+ u32 is_formatted : 1;
+ u32 swizzle_enable : 1;
+ u32 data_format : 6;
+ u32 num_format : 4;
+ u32 index_stride : 2;
+ u32 element_size : 2;
u32 size = 0;
+ AmdGpu::CompMapping dst_select{};
+ AmdGpu::NumberConversion num_conversion{};
bool operator==(const BufferSpecialization& other) const {
return stride == other.stride && is_storage == other.is_storage &&
- swizzle_enable == other.swizzle_enable &&
+ is_formatted == other.is_formatted && swizzle_enable == other.swizzle_enable &&
+ (!is_formatted ||
+ (data_format == other.data_format && num_format == other.num_format &&
+ dst_select == other.dst_select && num_conversion == other.num_conversion)) &&
(!swizzle_enable ||
(index_stride == other.index_stride && element_size == other.element_size)) &&
(size >= other.is_storage || is_storage);
}
};
-struct TextureBufferSpecialization {
- bool is_integer = false;
- AmdGpu::CompMapping dst_select{};
- AmdGpu::NumberConversion num_conversion{};
-
- auto operator<=>(const TextureBufferSpecialization&) const = default;
-};
-
struct ImageSpecialization {
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
bool is_integer = false;
@@ -82,7 +82,6 @@ struct StageSpecialization {
boost::container::small_vector vs_attribs;
std::bitset bitset{};
boost::container::small_vector buffers;
- boost::container::small_vector tex_buffers;
boost::container::small_vector images;
boost::container::small_vector fmasks;
boost::container::small_vector samplers;
@@ -111,7 +110,14 @@ struct StageSpecialization {
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp);
+ spec.is_formatted = desc.is_formatted;
spec.swizzle_enable = sharp.swizzle_enable;
+ if (spec.is_formatted) {
+ spec.data_format = static_cast(sharp.GetDataFmt());
+ spec.num_format = static_cast(sharp.GetNumberFmt());
+ spec.dst_select = sharp.DstSelect();
+ spec.num_conversion = sharp.GetNumberConversion();
+ }
if (spec.swizzle_enable) {
spec.index_stride = sharp.index_stride;
spec.element_size = sharp.element_size;
@@ -120,12 +126,6 @@ struct StageSpecialization {
spec.size = sharp.GetSize();
}
});
- ForEachSharp(binding, tex_buffers, info->texture_buffers,
- [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
- spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
- spec.dst_select = sharp.DstSelect();
- spec.num_conversion = sharp.GetNumberConversion();
- });
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetViewType(desc.is_array);
@@ -217,11 +217,6 @@ struct StageSpecialization {
return false;
}
}
- for (u32 i = 0; i < tex_buffers.size(); i++) {
- if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) {
- return false;
- }
- }
for (u32 i = 0; i < images.size(); i++) {
if (other.bitset[binding++] && images[i] != other.images[i]) {
return false;
diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h
index b442b2f1e..ee2dda494 100644
--- a/src/video_core/amdgpu/types.h
+++ b/src/video_core/amdgpu/types.h
@@ -298,6 +298,7 @@ inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataForma
inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
switch (format) {
+ case DataFormat::Format1_5_5_5:
case DataFormat::Format11_11_10: {
CompMapping result;
result.r = swizzle.b;
@@ -314,12 +315,13 @@ inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizz
result.a = swizzle.r;
return result;
}
- case DataFormat::Format1_5_5_5: {
+ case DataFormat::Format4_4_4_4: {
+ // Remap to a more supported component order.
CompMapping result;
- result.r = swizzle.b;
- result.g = swizzle.g;
- result.b = swizzle.r;
- result.a = swizzle.a;
+ result.r = swizzle.g;
+ result.g = swizzle.b;
+ result.b = swizzle.a;
+ result.a = swizzle.r;
return result;
}
default:
diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp
index a8d1271c6..15ef746cd 100644
--- a/src/video_core/buffer_cache/buffer.cpp
+++ b/src/video_core/buffer_cache/buffer.cpp
@@ -95,8 +95,7 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
// Create buffer object.
const vk::BufferCreateInfo buffer_ci = {
.size = size_bytes,
- // When maintenance5 is not supported, use all flags since we can't add flags to views.
- .usage = instance->IsMaintenance5Supported() ? flags : AllFlags,
+ .usage = flags,
};
VmaAllocationInfo alloc_info{};
buffer.Create(buffer_ci, usage, &alloc_info);
@@ -113,29 +112,6 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
-vk::BufferView Buffer::View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
- AmdGpu::NumberFormat nfmt) {
- const vk::BufferUsageFlags2CreateInfoKHR usage_flags = {
- .usage = is_written ? vk::BufferUsageFlagBits2KHR::eStorageTexelBuffer
- : vk::BufferUsageFlagBits2KHR::eUniformTexelBuffer,
- };
- const vk::BufferViewCreateInfo view_ci = {
- .pNext = instance->IsMaintenance5Supported() ? &usage_flags : nullptr,
- .buffer = buffer.buffer,
- .format = Vulkan::LiverpoolToVK::SurfaceFormat(dfmt, nfmt),
- .offset = offset,
- .range = size,
- };
- const auto [view_result, view] = instance->GetDevice().createBufferView(view_ci);
- ASSERT_MSG(view_result == vk::Result::eSuccess, "Failed to create buffer view: {}",
- vk::to_string(view_result));
- scheduler->DeferOperation(
- [view, device = instance->GetDevice()] { device.destroyBufferView(view); });
- Vulkan::SetObjectName(instance->GetDevice(), view, "BufferView {:#x}:{:#x}", cpu_addr + offset,
- size);
- return view;
-}
-
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h
index 63391a180..ec92a0ebf 100644
--- a/src/video_core/buffer_cache/buffer.h
+++ b/src/video_core/buffer_cache/buffer.h
@@ -32,13 +32,12 @@ enum class MemoryUsage {
};
constexpr vk::BufferUsageFlags ReadFlags =
- vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformTexelBuffer |
- vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eIndexBuffer |
- vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer;
+ vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eUniformBuffer |
+ vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
+ vk::BufferUsageFlagBits::eIndirectBuffer;
-constexpr vk::BufferUsageFlags AllFlags = ReadFlags | vk::BufferUsageFlagBits::eTransferDst |
- vk::BufferUsageFlagBits::eStorageTexelBuffer |
- vk::BufferUsageFlagBits::eStorageBuffer;
+constexpr vk::BufferUsageFlags AllFlags =
+ ReadFlags | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eStorageBuffer;
struct UniqueBuffer {
explicit UniqueBuffer(vk::Device device, VmaAllocator allocator);
@@ -83,9 +82,6 @@ public:
Buffer& operator=(Buffer&&) = default;
Buffer(Buffer&&) = default;
- vk::BufferView View(u32 offset, u32 size, bool is_written, AmdGpu::DataFormat dfmt,
- AmdGpu::NumberFormat nfmt);
-
/// Increases the likeliness of this being a stream buffer
void IncreaseStreamScore(int score) noexcept {
stream_score += score;
diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
index f2fbc6530..5c02ef39f 100644
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@@ -352,12 +352,9 @@ vk::ComponentMapping ComponentMapping(AmdGpu::CompMapping comp_mapping) {
};
}
-static constexpr vk::FormatFeatureFlags2 BufferRead =
- vk::FormatFeatureFlagBits2::eUniformTexelBuffer | vk::FormatFeatureFlagBits2::eVertexBuffer;
-static constexpr vk::FormatFeatureFlags2 BufferWrite =
- vk::FormatFeatureFlagBits2::eStorageTexelBuffer |
- vk::FormatFeatureFlagBits2::eStorageReadWithoutFormat |
- vk::FormatFeatureFlagBits2::eStorageWriteWithoutFormat;
+// Texel buffer feature flags are not needed as format is interpreted in-shader.
+static constexpr vk::FormatFeatureFlags2 BufferRead = vk::FormatFeatureFlagBits2::eVertexBuffer;
+static constexpr vk::FormatFeatureFlags2 BufferWrite = static_cast(0);
static constexpr vk::FormatFeatureFlags2 ImageRead = vk::FormatFeatureFlagBits2::eTransferSrc |
vk::FormatFeatureFlagBits2::eTransferDst |
vk::FormatFeatureFlagBits2::eSampledImage;
@@ -618,7 +615,7 @@ std::span SurfaceFormats() {
vk::Format::eR5G5B5A1UnormPack16),
// 4_4_4_4
CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format4_4_4_4, AmdGpu::NumberFormat::Unorm,
- vk::Format::eA4B4G4R4UnormPack16),
+ vk::Format::eB4G4R4A4UnormPack16),
// 8_24
// 24_8
// X24_8_32
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index afa598fca..0832f65a2 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -55,15 +55,6 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
- for (const auto& tex_buffer : info->texture_buffers) {
- bindings.push_back({
- .binding = binding++,
- .descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
- : vk::DescriptorType::eUniformTexelBuffer,
- .descriptorCount = 1,
- .stageFlags = vk::ShaderStageFlagBits::eCompute,
- });
- }
for (const auto& image : info->images) {
bindings.push_back({
.binding = binding++,
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 4ca3a7f27..588754c00 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -131,8 +131,7 @@ GraphicsPipeline::GraphicsPipeline(
vk::DynamicState::eStencilOpEXT,
};
- if (instance.IsColorWriteEnableSupported()) {
- dynamic_states.push_back(vk::DynamicState::eColorWriteEnableEXT);
+ if (instance.IsDynamicColorWriteMaskSupported()) {
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
}
if (instance.IsVertexInputDynamicState()) {
@@ -241,7 +240,7 @@ GraphicsPipeline::GraphicsPipeline(
? LiverpoolToVK::BlendOp(control.alpha_func)
: color_blend,
.colorWriteMask =
- instance.IsColorWriteEnableSupported()
+ instance.IsDynamicColorWriteMaskSupported()
? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
: key.write_masks[i],
@@ -376,15 +375,6 @@ void GraphicsPipeline::BuildDescSetLayout() {
.stageFlags = gp_stage_flags,
});
}
- for (const auto& tex_buffer : stage->texture_buffers) {
- bindings.push_back({
- .binding = binding++,
- .descriptorType = tex_buffer.is_written ? vk::DescriptorType::eStorageTexelBuffer
- : vk::DescriptorType::eUniformTexelBuffer,
- .descriptorCount = 1,
- .stageFlags = gp_stage_flags,
- });
- }
for (const auto& image : stage->images) {
bindings.push_back({
.binding = binding++,
diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp
index a722b5322..e64cae87d 100644
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@@ -206,27 +206,23 @@ std::string Instance::GetDriverVersionName() {
}
bool Instance::CreateDevice() {
- const vk::StructureChain feature_chain = physical_device.getFeatures2<
- vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
- vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
- vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT,
- vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
- vk::PhysicalDeviceCustomBorderColorFeaturesEXT,
- vk::PhysicalDeviceColorWriteEnableFeaturesEXT, vk::PhysicalDeviceVulkan12Features,
- vk::PhysicalDeviceVulkan13Features,
- vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR,
- vk::PhysicalDeviceDepthClipControlFeaturesEXT, vk::PhysicalDeviceRobustness2FeaturesEXT,
- vk::PhysicalDevicePortabilitySubsetFeaturesKHR>();
- const vk::StructureChain properties_chain = physical_device.getProperties2<
- vk::PhysicalDeviceProperties2, vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
- vk::PhysicalDeviceExternalMemoryHostPropertiesEXT, vk::PhysicalDeviceVulkan11Properties,
- vk::PhysicalDevicePushDescriptorPropertiesKHR, vk::PhysicalDeviceVulkan12Properties>();
- subgroup_size = properties_chain.get().subgroupSize;
- push_descriptor_props = properties_chain.get();
- vk12_props = properties_chain.get();
- LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
-
+ const vk::StructureChain feature_chain =
+ physical_device
+ .getFeatures2();
features = feature_chain.get().features;
+
+ const vk::StructureChain properties_chain = physical_device.getProperties2<
+ vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties,
+ vk::PhysicalDeviceVulkan12Properties, vk::PhysicalDevicePushDescriptorPropertiesKHR>();
+ vk11_props = properties_chain.get();
+ vk12_props = properties_chain.get();
+ push_descriptor_props = properties_chain.get();
+ LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", vk11_props.subgroupSize);
+
if (available_extensions.empty()) {
LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
return false;
@@ -248,42 +244,41 @@ bool Instance::CreateDevice() {
return false;
};
- add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
- shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
- external_memory_host = add_extension(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME);
- custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
- add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
- depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
- add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
- workgroup_memory_explicit_layout =
- add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
- vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
- fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
-
- // The next two extensions are required to be available together in order to support write masks
- color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
- color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
- const bool calibrated_timestamps =
- TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
- const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
- list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
- maintenance5 = add_extension(VK_KHR_MAINTENANCE_5_EXTENSION_NAME);
- legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
- image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
- amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
-
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
// with extensions.
- if (Config::vkValidationEnabled() || Config::isRdocEnabled()) {
- tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
- }
- const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
add_extension(VK_KHR_FORMAT_FEATURE_FLAGS_2_EXTENSION_NAME);
add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME);
add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
- add_extension(VK_EXT_4444_FORMATS_EXTENSION_NAME);
+ tooling_info = add_extension(VK_EXT_TOOLING_INFO_EXTENSION_NAME);
+ const bool maintenance4 = add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME);
+
+ add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
+ add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
+ dynamic_color_write_mask = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
+ if (dynamic_color_write_mask) {
+ dynamic_color_write_mask =
+ feature_chain.get()
+ .extendedDynamicState3ColorWriteMask;
+ }
+ null_descriptor = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
+ if (null_descriptor) {
+ null_descriptor =
+ feature_chain.get().nullDescriptor;
+ }
+ custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+ depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
+ vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
+ list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
+ fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
+ legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME);
+ shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
+ image_load_store_lod = add_extension(VK_AMD_SHADER_IMAGE_LOAD_STORE_LOD_EXTENSION_NAME);
+ amd_gcn_shader = add_extension(VK_AMD_GCN_SHADER_EXTENSION_NAME);
+ const bool calibrated_timestamps =
+ TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
#ifdef __APPLE__
// Required by Vulkan spec if supported.
@@ -310,8 +305,7 @@ bool Instance::CreateDevice() {
return false;
}
- static constexpr std::array queue_priorities = {1.0f};
-
+ static constexpr std::array queue_priorities = {1.0f};
const vk::DeviceQueueCreateInfo queue_info = {
.queueFamilyIndex = queue_family_index,
.queueCount = static_cast(queue_priorities.size()),
@@ -320,7 +314,6 @@ bool Instance::CreateDevice() {
const auto topology_list_restart_features =
feature_chain.get();
-
const auto vk12_features = feature_chain.get();
vk::StructureChain device_chain = {
vk::DeviceCreateInfo{
@@ -365,46 +358,36 @@ bool Instance::CreateDevice() {
.hostQueryReset = vk12_features.hostQueryReset,
.timelineSemaphore = vk12_features.timelineSemaphore,
},
- vk::PhysicalDeviceMaintenance4FeaturesKHR{
- .maintenance4 = true,
- },
- vk::PhysicalDeviceMaintenance5FeaturesKHR{
- .maintenance5 = true,
- },
+ // Vulkan 1.3 promoted extensions
vk::PhysicalDeviceDynamicRenderingFeaturesKHR{
.dynamicRendering = true,
},
vk::PhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT{
.shaderDemoteToHelperInvocation = true,
},
- vk::PhysicalDeviceCustomBorderColorFeaturesEXT{
- .customBorderColors = true,
- .customBorderColorWithoutFormat = true,
- },
- vk::PhysicalDeviceColorWriteEnableFeaturesEXT{
- .colorWriteEnable = true,
+ vk::PhysicalDeviceSynchronization2Features{
+ .synchronization2 = true,
},
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{
.extendedDynamicState = true,
},
+ vk::PhysicalDeviceMaintenance4FeaturesKHR{
+ .maintenance4 = true,
+ },
+ // Other extensions
+ vk::PhysicalDeviceCustomBorderColorFeaturesEXT{
+ .customBorderColors = true,
+ .customBorderColorWithoutFormat = true,
+ },
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
.extendedDynamicState3ColorWriteMask = true,
},
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
.depthClipControl = true,
},
- vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
- .workgroupMemoryExplicitLayout = true,
- .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
- .workgroupMemoryExplicitLayout8BitAccess = true,
- .workgroupMemoryExplicitLayout16BitAccess = true,
- },
vk::PhysicalDeviceRobustness2FeaturesEXT{
.nullDescriptor = true,
},
- vk::PhysicalDeviceSynchronization2Features{
- .synchronization2 = true,
- },
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
.vertexInputDynamicState = true,
},
@@ -427,37 +410,24 @@ bool Instance::CreateDevice() {
if (!maintenance4) {
device_chain.unlink();
}
- if (!maintenance5) {
- device_chain.unlink();
- }
if (!custom_border_color) {
device_chain.unlink();
}
- if (!color_write_en) {
- device_chain.unlink();
+ if (!dynamic_color_write_mask) {
device_chain.unlink();
}
if (!depth_clip_control) {
device_chain.unlink();
}
- if (!workgroup_memory_explicit_layout) {
- device_chain.unlink();
- }
- if (!list_restart) {
- device_chain.unlink();
- }
- if (robustness) {
- null_descriptor =
- feature_chain.get().nullDescriptor;
- device_chain.get().nullDescriptor =
- null_descriptor;
- } else {
- null_descriptor = false;
+ if (!null_descriptor) {
device_chain.unlink();
}
if (!vertex_input_dynamic_state) {
device_chain.unlink();
}
+ if (!list_restart) {
+ device_chain.unlink();
+ }
if (!fragment_shader_barycentric) {
device_chain.unlink();
}
diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h
index 8c4752c3f..1748fcd59 100644
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@@ -89,34 +89,19 @@ public:
return custom_border_color;
}
- /// Returns true when VK_EXT_fragment_shader_interlock is supported
- bool IsFragmentShaderInterlockSupported() const {
- return fragment_shader_interlock;
- }
-
- /// Returns true when VK_EXT_pipeline_creation_cache_control is supported
- bool IsPipelineCreationCacheControlSupported() const {
- return pipeline_creation_cache_control;
- }
-
/// Returns true when VK_EXT_shader_stencil_export is supported
bool IsShaderStencilExportSupported() const {
return shader_stencil_export;
}
- /// Returns true when VK_EXT_external_memory_host is supported
- bool IsExternalMemoryHostSupported() const {
- return external_memory_host;
- }
-
/// Returns true when VK_EXT_depth_clip_control is supported
bool IsDepthClipControlSupported() const {
return depth_clip_control;
}
- /// Returns true when VK_EXT_color_write_enable is supported
- bool IsColorWriteEnableSupported() const {
- return color_write_en;
+ /// Returns true when dynamic color write mask state is supported
+ bool IsDynamicColorWriteMaskSupported() const {
+ return dynamic_color_write_mask;
}
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
@@ -129,11 +114,6 @@ public:
return null_descriptor;
}
- /// Returns true when VK_KHR_maintenance5 is supported.
- bool IsMaintenance5Supported() const {
- return maintenance5;
- }
-
/// Returns true when VK_KHR_fragment_shader_barycentric is supported.
bool IsFragmentShaderBarycentricSupported() const {
return fragment_shader_barycentric;
@@ -224,11 +204,6 @@ public:
return properties.limits.minStorageBufferOffsetAlignment;
}
- /// Returns the minimum required alignment for texel buffers
- vk::DeviceSize TexelBufferMinAlignment() const {
- return properties.limits.minTexelBufferOffsetAlignment;
- }
-
/// Returns the minimum alignemt required for accessing host-mapped device memory
vk::DeviceSize NonCoherentAtomSize() const {
return properties.limits.nonCoherentAtomSize;
@@ -236,7 +211,7 @@ public:
/// Returns the subgroup size of the selected physical device.
u32 SubgroupSize() const {
- return subgroup_size;
+ return vk11_props.subgroupSize;
}
/// Returns the maximum size of compute shared memory.
@@ -244,11 +219,6 @@ public:
return properties.limits.maxComputeSharedMemorySize;
}
- /// Returns the maximum supported elements in a texel buffer
- u32 MaxTexelBufferElements() const {
- return properties.limits.maxTexelBufferElements;
- }
-
/// Returns the maximum sampler LOD bias.
float MaxSamplerLodBias() const {
return properties.limits.maxSamplerLodBias;
@@ -274,11 +244,6 @@ public:
return features.shaderClipDistance;
}
- /// Returns the minimum imported host pointer alignment
- u64 GetMinImportedHostPointerAlignment() const {
- return min_imported_host_pointer_alignment;
- }
-
u32 GetMaxViewportWidth() const {
return properties.limits.maxViewportDimensions[0];
}
@@ -316,8 +281,9 @@ private:
vk::PhysicalDevice physical_device;
vk::UniqueDevice device;
vk::PhysicalDeviceProperties properties;
- vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
+ vk::PhysicalDeviceVulkan11Properties vk11_props;
vk::PhysicalDeviceVulkan12Properties vk12_props;
+ vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props;
vk::PhysicalDeviceFeatures features;
vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
@@ -330,27 +296,18 @@ private:
std::unordered_map format_properties;
TracyVkCtx profiler_context{};
u32 queue_family_index{0};
- bool image_view_reinterpretation{true};
- bool timeline_semaphores{};
bool custom_border_color{};
- bool fragment_shader_interlock{};
- bool pipeline_creation_cache_control{};
bool fragment_shader_barycentric{};
- bool shader_stencil_export{};
- bool external_memory_host{};
bool depth_clip_control{};
- bool workgroup_memory_explicit_layout{};
- bool color_write_en{};
+ bool dynamic_color_write_mask{};
bool vertex_input_dynamic_state{};
bool null_descriptor{};
- bool maintenance5{};
bool list_restart{};
bool legacy_vertex_attributes{};
+ bool shader_stencil_export{};
bool image_load_store_lod{};
bool amd_gcn_shader{};
bool tooling_info{};
- u64 min_imported_host_pointer_alignment{};
- u32 subgroup_size{};
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index d8f6a08d0..16d2187db 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -29,8 +29,6 @@ using Shader::VsOutput;
constexpr static std::array DescriptorHeapSizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 8192},
vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, 1024},
- vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 128},
- vk::DescriptorPoolSize{vk::DescriptorType::eStorageTexelBuffer, 128},
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 8192},
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 1024},
};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7f2db3f8d..6f979a734 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -435,28 +435,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (pipeline->IsCompute()) {
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
- // Most of the time when a metadata is updated with a shader it gets cleared. It means
- // we can skip the whole dispatch and update the tracked state instead. Also, it is not
- // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
- // will need its full emulation anyways. For cases of metadata read a warning will be
- // logged.
- const auto IsMetaUpdate = [&](const auto& desc) {
- const auto sharp = desc.GetSharp(info);
- const VAddr address = sharp.base_address;
- if (desc.is_written) {
- // Assume all slices were updates
- if (texture_cache.ClearMeta(address)) {
- LOG_TRACE(Render_Vulkan, "Metadata update skipped");
- return true;
- }
- } else {
- if (texture_cache.IsMeta(address)) {
- LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
- }
- }
- return false;
- };
-
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
bool meta_read = false;
for (const auto& desc : info.buffers) {
@@ -469,23 +447,26 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
}
}
- for (const auto& desc : info.texture_buffers) {
- if (!desc.is_written) {
- const VAddr address = desc.GetSharp(info).base_address;
- meta_read = texture_cache.IsMeta(address);
- }
- }
-
+ // Most of the time when a metadata is updated with a shader it gets cleared. It means
+ // we can skip the whole dispatch and update the tracked state instead. Also, it is not
+ // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
+ // will need its full emulation anyways. For cases of metadata read a warning will be
+ // logged.
if (!meta_read) {
for (const auto& desc : info.buffers) {
- if (IsMetaUpdate(desc)) {
- return false;
- }
- }
-
- for (const auto& desc : info.texture_buffers) {
- if (IsMetaUpdate(desc)) {
- return false;
+ const auto sharp = desc.GetSharp(info);
+ const VAddr address = sharp.base_address;
+ if (desc.is_written) {
+ // Assume all slices were updates
+ if (texture_cache.ClearMeta(address)) {
+ LOG_TRACE(Render_Vulkan, "Metadata update skipped");
+ return false;
+ }
+ } else {
+ if (texture_cache.IsMeta(address)) {
+ LOG_WARNING(Render_Vulkan,
+ "Unexpected metadata read by a CS shader (buffer)");
+ }
}
}
}
@@ -541,19 +522,6 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
}
}
- texbuffer_bindings.clear();
-
- for (const auto& desc : stage.texture_buffers) {
- const auto vsharp = desc.GetSharp(stage);
- if (vsharp.base_address != 0 && vsharp.GetSize() > 0 &&
- vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
- const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
- texbuffer_bindings.emplace_back(buffer_id, vsharp);
- } else {
- texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
- }
- }
-
// Bind a SSBO to act as shared memory in case of not being able to use a workgroup buffer
// (e.g. when the compute shared memory is bigger than the GPU's shared memory)
if (stage.has_emulated_shared_memory) {
@@ -601,8 +569,9 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
}
} else {
- const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
- vsharp.base_address, vsharp.GetSize(), desc.is_written, false, buffer_id);
+ const auto [vk_buffer, offset] =
+ buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written,
+ desc.is_formatted, buffer_id);
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const u32 offset_aligned = Common::AlignDown(offset, alignment);
@@ -617,6 +586,9 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
vk::PipelineStageFlagBits2::eAllCommands)) {
buffer_barriers.emplace_back(*barrier);
}
+ if (desc.is_written && desc.is_formatted) {
+ texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
+ }
}
set_writes.push_back({
@@ -630,56 +602,6 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
});
++binding.buffer;
}
-
- for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
- const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
- const auto& desc = stage.texture_buffers[i];
- // Fallback format for null buffer view; never used in valid buffer case.
- const auto data_fmt = vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid
- ? vsharp.GetDataFmt()
- : AmdGpu::DataFormat::Format8;
- const u32 fmt_stride = AmdGpu::NumBits(data_fmt) >> 3;
- vk::BufferView buffer_view;
- if (buffer_id) {
- const u32 alignment = instance.TexelBufferMinAlignment();
- const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
- vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
- const u32 buf_stride = vsharp.GetStride();
- ASSERT_MSG(buf_stride % fmt_stride == 0,
- "Texel buffer stride must match format stride");
- const u32 offset_aligned = Common::AlignDown(offset, alignment);
- const u32 adjust = offset - offset_aligned;
- ASSERT(adjust % fmt_stride == 0);
- push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
- buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust,
- desc.is_written, data_fmt, vsharp.GetNumberFmt());
- if (auto barrier =
- vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
- : vk::AccessFlagBits2::eShaderRead,
- vk::PipelineStageFlagBits2::eAllCommands)) {
- buffer_barriers.emplace_back(*barrier);
- }
- if (desc.is_written) {
- texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
- }
- } else if (instance.IsNullDescriptorSupported()) {
- buffer_view = VK_NULL_HANDLE;
- } else {
- buffer_view =
- null_buffer.View(0, fmt_stride, desc.is_written, data_fmt, vsharp.GetNumberFmt());
- }
-
- set_writes.push_back({
- .dstSet = VK_NULL_HANDLE,
- .dstBinding = binding.unified++,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
- : vk::DescriptorType::eUniformTexelBuffer,
- .pTexelBufferView = &buffer_views.emplace_back(buffer_view),
- });
- ++binding.buffer;
- }
}
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding,
@@ -1062,14 +984,8 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) {
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.setBlendConstants(®s.blend_constants.red);
- if (instance.IsColorWriteEnableSupported()) {
- const auto& write_masks = pipeline.GetWriteMasks();
- std::array write_ens{};
- std::transform(write_masks.cbegin(), write_masks.cend(), write_ens.begin(),
- [](auto in) { return in ? vk::True : vk::False; });
-
- cmdbuf.setColorWriteEnableEXT(write_ens);
- cmdbuf.setColorWriteMaskEXT(0, write_masks);
+ if (instance.IsDynamicColorWriteMaskSupported()) {
+ cmdbuf.setColorWriteMaskEXT(0, pipeline.GetWriteMasks());
}
if (regs.depth_control.depth_bounds_enable) {
cmdbuf.setDepthBounds(regs.depth_bounds_min, regs.depth_bounds_max);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 6e1a1d82e..db458662c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -120,8 +120,6 @@ private:
using BufferBindingInfo = std::pair;
boost::container::static_vector buffer_bindings;
- using TexBufferBindingInfo = std::pair;
- boost::container::static_vector texbuffer_bindings;
using ImageBindingInfo = std::pair;
boost::container::static_vector image_bindings;
};
diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp
index ff78f5d24..d73fdbeb1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp
@@ -19,9 +19,9 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info,
auto& buffer_cache = rasterizer.GetBufferCache();
// Copy shader defines three formatted buffers as inputs: control, source, and destination.
- const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info);
- const auto src_buf_sharp = info.texture_buffers[1].GetSharp(info);
- const auto dst_buf_sharp = info.texture_buffers[2].GetSharp(info);
+ const auto ctl_buf_sharp = info.buffers[0].GetSharp(info);
+ const auto src_buf_sharp = info.buffers[1].GetSharp(info);
+ const auto dst_buf_sharp = info.buffers[2].GetSharp(info);
const auto buf_stride = src_buf_sharp.GetStride();
ASSERT(buf_stride == dst_buf_sharp.GetStride());
@@ -95,12 +95,10 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info,
}
// Obtain buffers for the total source and destination ranges.
- const auto [src_buf, src_buf_offset] =
- buffer_cache.ObtainBuffer(src_buf_sharp.base_address + src_offset_min,
- src_offset_max - src_offset_min, false, false);
- const auto [dst_buf, dst_buf_offset] =
- buffer_cache.ObtainBuffer(dst_buf_sharp.base_address + dst_offset_min,
- dst_offset_max - dst_offset_min, true, false);
+ const auto [src_buf, src_buf_offset] = buffer_cache.ObtainBuffer(
+ src_buf_sharp.base_address + src_offset_min, src_offset_max - src_offset_min, false);
+ const auto [dst_buf, dst_buf_offset] = buffer_cache.ObtainBuffer(
+ dst_buf_sharp.base_address + dst_offset_min, dst_offset_max - dst_offset_min, true);
// Apply found buffer base.
const auto vk_copies = std::span{copies}.subspan(batch_start, batch_end - batch_start);