From 4d1a1ce9c2aae933a3d4e9a0a07e55eb5e5875e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?=
Date: Thu, 5 Jun 2025 01:55:47 +0200
Subject: [PATCH 01/28] v_rcp_legacy_f32 (#3040)
---
.../frontend/translate/translate.h | 1 +
.../frontend/translate/vector_alu.cpp | 16 ++++++++++++++++
2 files changed, 17 insertions(+)
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index 7b4b03f27..2584d5c5e 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -204,6 +204,7 @@ public:
void V_EXP_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst);
void V_RCP_F32(const GcnInst& inst);
+ void V_RCP_LEGACY_F32(const GcnInst& inst);
void V_RCP_F64(const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);
void V_SQRT_F32(const GcnInst& inst);
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index fb3f52c7f..3b88e4dec 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -158,6 +158,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_LOG_F32(inst);
case Opcode::V_RCP_F32:
return V_RCP_F32(inst);
+ case Opcode::V_RCP_LEGACY_F32:
+ return V_RCP_LEGACY_F32(inst);
case Opcode::V_RCP_F64:
return V_RCP_F64(inst);
case Opcode::V_RCP_IFLAG_F32:
@@ -798,6 +800,20 @@ void Translator::V_RCP_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPRecip(src0));
}
+void Translator::V_RCP_LEGACY_F32(const GcnInst& inst) {
+ const IR::F32 src0{GetSrc(inst.src[0])};
+ const auto result = ir.FPRecip(src0);
+ const auto inf = ir.FPIsInf(result);
+
+ const auto raw_result = ir.ConvertFToU(32, result);
+ const auto sign_bit = ir.ShiftRightLogical(raw_result, ir.Imm32(31u));
+ const auto sign_bit_set = ir.INotEqual(sign_bit, ir.Imm32(0u));
+ const IR::F32 inf_result{ir.Select(sign_bit_set, ir.Imm32(-0.0f), ir.Imm32(0.0f))};
+ const IR::F32 val{ir.Select(inf, inf_result, result)};
+
+ SetDst(inst.dst[0], val);
+}
+
void Translator::V_RCP_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64(inst.src[0])};
SetDst64(inst.dst[0], ir.FPRecip(src0));
From d4fbeea085d704a94151e44a5dcf686e2d33a7b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?=
Date: Thu, 5 Jun 2025 02:00:11 +0200
Subject: [PATCH 02/28] Stub PM4 COPY_DATA opcode (#3032)
---
src/video_core/amdgpu/liverpool.cpp | 15 ++++++--
src/video_core/amdgpu/pm4_cmds.h | 55 +++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp
index 4db7648c6..118c43cef 100644
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@@ -394,7 +394,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span dcb, std::span(header);
- LOG_DEBUG(Render_Vulkan,
- "Encountered EventWrite: event_type = {}, event_index = {}",
+ LOG_DEBUG(Render, "Encountered EventWrite: event_type = {}, event_index = {}",
magic_enum::enum_name(event->event_type.Value()),
magic_enum::enum_name(event->event_index.Value()));
if (event->event_type.Value() == EventType::SoVgtStreamoutFlush) {
@@ -673,6 +672,16 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header);
+ LOG_WARNING(Render,
+ "unhandled IT_COPY_DATA src_sel = {}, dst_sel = {}, "
+ "count_sel = {}, wr_confirm = {}, engine_sel = {}",
+ u32(copy_data->src_sel.Value()), u32(copy_data->dst_sel.Value()),
+ copy_data->count_sel.Value(), copy_data->wr_confirm.Value(),
+ u32(copy_data->engine_sel.Value()));
+ break;
+ }
case PM4ItOpcode::MemSemaphore: {
const auto* mem_semaphore = reinterpret_cast(header);
if (mem_semaphore->IsSignaling()) {
diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h
index 58ecda93e..011e47bf0 100644
--- a/src/video_core/amdgpu/pm4_cmds.h
+++ b/src/video_core/amdgpu/pm4_cmds.h
@@ -554,6 +554,61 @@ struct PM4DmaData {
}
};
+enum class CopyDataSrc : u32 {
+ MappedRegister = 0,
+ Memory = 1,
+ TCL2 = 2,
+ Gds = 3,
+ // Reserved = 4,
+ Immediate = 5,
+ Atomic = 6,
+ GdsAtomic0 = 7,
+ GdsAtomic1 = 8,
+ GpuClock = 9,
+};
+
+enum class CopyDataDst : u32 {
+ MappedRegister = 0,
+ MemorySync = 1,
+ TCL2 = 2,
+ Gds = 3,
+ // Reserved = 4,
+ MemoryAsync = 5,
+};
+
+enum class CopyDataEngine : u32 {
+ Me = 0,
+ Pfp = 1,
+ Ce = 2,
+ // Reserved = 3
+};
+
+struct PM4CmdCopyData {
+ PM4Type3Header header;
+ union {
+ BitField<0, 4, CopyDataSrc> src_sel;
+ BitField<8, 4, CopyDataDst> dst_sel;
+ BitField<16, 1, u32> count_sel;
+ BitField<20, 1, u32> wr_confirm;
+ BitField<30, 2, CopyDataEngine> engine_sel;
+ u32 control;
+ };
+ u32 src_addr_lo;
+ u32 src_addr_hi;
+ u32 dst_addr_lo;
+ u32 dst_addr_hi;
+
+ template
+ T SrcAddress() const {
+ return std::bit_cast(src_addr_lo | u64(src_addr_hi) << 32);
+ }
+
+ template
+ T DstAddress() const {
+ return std::bit_cast(dst_addr_lo | u64(dst_addr_hi) << 32);
+ }
+};
+
struct PM4CmdRewind {
PM4Type3Header header;
union {
From 285df1b5befcedb1287007ed992e1805b148025f Mon Sep 17 00:00:00 2001
From: DanielSvoboda
Date: Thu, 5 Jun 2025 02:48:47 -0300
Subject: [PATCH 03/28] QT: AutoUpdate - Fix Changelog Error (#3042)
---
.github/workflows/build.yml | 6 +++---
src/qt_gui/check_update.cpp | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ceb915f6a..bb3d157b7 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -494,7 +494,7 @@ jobs:
with:
token: ${{ secrets.SHADPS4_TOKEN_REPO }}
name: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
- tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}"
+ tag: "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}"
draft: false
prerelease: true
body: "Full Changelog: [${{ env.last_release_tag }}...${{ needs.get-info.outputs.shorthash }}](https://github.com/shadps4-emu/shadPS4/compare/${{ env.last_release_tag }}...${{ needs.get-info.outputs.fullhash }})"
@@ -530,14 +530,14 @@ jobs:
# Check if release already exists and get ID
release_id=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \
- "https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}" | jq -r '.id')
+ "https://api.github.com/repos/$REPO/releases/tags/Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}" | jq -r '.id')
if [[ "$release_id" == "null" ]]; then
echo "Creating release in $REPO for $filename"
release_id=$(curl -s -X POST -H "Authorization: token $GITHUB_TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
-d '{
- "tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
+ "tag_name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.fullhash }}",
"name": "Pre-release-shadPS4-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}",
"draft": false,
"prerelease": true,
diff --git a/src/qt_gui/check_update.cpp b/src/qt_gui/check_update.cpp
index 550fdddb5..b0858840a 100644
--- a/src/qt_gui/check_update.cpp
+++ b/src/qt_gui/check_update.cpp
@@ -137,7 +137,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
}
}
- latestRev = latestVersion.right(7);
+ latestRev = latestVersion.right(40);
latestDate = jsonObj["published_at"].toString();
QJsonArray assets = jsonObj["assets"].toArray();
@@ -167,7 +167,7 @@ tr("The Auto Updater allows up to 60 update checks per hour.\\nYou have reached
QDateTime dateTime = QDateTime::fromString(latestDate, Qt::ISODate);
latestDate = dateTime.isValid() ? dateTime.toString("yyyy-MM-dd HH:mm:ss") : "Unknown date";
- if (latestRev == currentRev.left(7)) {
+ if (latestRev == currentRev) {
if (showMessage) {
QMessageBox::information(this, tr("Auto Updater"),
tr("Your version is already up to date!"));
@@ -215,7 +215,7 @@ void CheckUpdate::setupUI(const QString& downloadUrl, const QString& latestDate,
"%3 | "
"(%4) | "
"
")
- .arg(currentRev.left(7), currentDate, latestRev, latestDate);
+ .arg(currentRev.left(7), currentDate, latestRev.left(7), latestDate);
QLabel* updateLabel = new QLabel(updateText, this);
layout->addWidget(updateLabel);
From 93222c6f9f01c15855b3cee23c7856b963b6b1e2 Mon Sep 17 00:00:00 2001
From: georgemoralis
Date: Thu, 5 Jun 2025 08:49:32 +0300
Subject: [PATCH 04/28] New Crowdin updates (#3038)
* New translations en_us.ts (Portuguese, Brazilian)
* New translations en_us.ts (Turkish)
---
src/qt_gui/translations/pt_BR.ts | 2 +-
src/qt_gui/translations/tr_TR.ts | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/qt_gui/translations/pt_BR.ts b/src/qt_gui/translations/pt_BR.ts
index 34d31f240..9f254e272 100644
--- a/src/qt_gui/translations/pt_BR.ts
+++ b/src/qt_gui/translations/pt_BR.ts
@@ -2048,7 +2048,7 @@
* Unsupported Vulkan Version
- * Unsupported Vulkan Version
+ * Versão do Vulkan não suportada
diff --git a/src/qt_gui/translations/tr_TR.ts b/src/qt_gui/translations/tr_TR.ts
index e61985e90..c6d641470 100644
--- a/src/qt_gui/translations/tr_TR.ts
+++ b/src/qt_gui/translations/tr_TR.ts
@@ -138,7 +138,7 @@
File Exists
- Dosya mevcut
+ Dosya Mevcut
File already exists. Do you want to replace it?
@@ -1221,7 +1221,7 @@
Exit shadPS4
- shadPS4'ten Çık
+ shadPS4 Çıkış
Exit the application.
@@ -1381,7 +1381,7 @@
Game Boot
- Oyun Başlatma
+ Oyun Başlat
Only one file can be selected!
From 0e9420a7b228f3e560ba154ad33e037358679638 Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Thu, 5 Jun 2025 08:43:39 -0500
Subject: [PATCH 05/28] Fix request queues in libSceZlib (#3041)
Queues are a FIFO data structure, so pop() removes the front, not the end.
---
src/core/libraries/zlib/zlib.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/core/libraries/zlib/zlib.cpp b/src/core/libraries/zlib/zlib.cpp
index 899cb5bf6..b304992ad 100644
--- a/src/core/libraries/zlib/zlib.cpp
+++ b/src/core/libraries/zlib/zlib.cpp
@@ -51,7 +51,7 @@ void ZlibTaskThread(const std::stop_token& stop) {
if (!task_queue_cv.wait(lock, stop, [&] { return !task_queue.empty(); })) {
break;
}
- task = task_queue.back();
+ task = task_queue.front();
task_queue.pop();
}
@@ -136,7 +136,7 @@ s32 PS4_SYSV_ABI sceZlibWaitForDone(u64* request_id, const u32* timeout) {
} else {
done_queue_cv.wait(lock, pred);
}
- *request_id = done_queue.back();
+ *request_id = done_queue.front();
done_queue.pop();
}
return ORBIS_OK;
From 3b3026ff1c98137e4f3051ec44c0eb4e1fa2f8ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C2=A5IGA?= <164882787+Xphalnos@users.noreply.github.com>
Date: Thu, 5 Jun 2025 15:44:02 +0200
Subject: [PATCH 06/28] [CI] Update Qt to 6.9.1 (#3037)
---
.github/workflows/build.yml | 30 ++++++++++--------------------
1 file changed, 10 insertions(+), 20 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bb3d157b7..588236b14 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -76,18 +76,13 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- - name: Setup VS Environment
- uses: ilammy/msvc-dev-cmd@v1.13.0
- with:
- arch: amd64
-
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@@ -111,7 +106,7 @@ jobs:
- name: Setup Qt
uses: jurplel/install-qt-action@v4
with:
- version: 6.9.0
+ version: 6.9.1
host: windows
target: desktop
arch: win64_msvc2022_64
@@ -130,18 +125,13 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build
with:
append-timestamp: false
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- - name: Setup VS Environment
- uses: ilammy/msvc-dev-cmd@v1.13.0
- with:
- arch: amd64
-
- name: Configure CMake
run: cmake --fresh -G Ninja -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_QT_GUI=ON -DENABLE_UPDATER=ON -DCMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE=ON -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
@@ -186,7 +176,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{runner.os}}-sdl-cache-cmake-build
with:
@@ -228,7 +218,7 @@ jobs:
- name: Setup Qt
uses: jurplel/install-qt-action@v4
with:
- version: 6.9.0
+ version: 6.9.1
host: mac
target: desktop
arch: clang_64
@@ -247,7 +237,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{runner.os}}-qt-cache-cmake-build
with:
@@ -301,7 +291,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
@@ -362,7 +352,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-qt-cache-cmake-build
with:
@@ -409,7 +399,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
with:
@@ -445,7 +435,7 @@ jobs:
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.17
+ uses: hendrikmuhs/ccache-action@v1.2.18
env:
cache-name: ${{ runner.os }}-qt-gcc-cache-cmake-build
with:
From 43bf4ed1bca1cd6442e97b04d9afb8383219bb86 Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Thu, 5 Jun 2025 14:14:34 -0500
Subject: [PATCH 07/28] sceVideoOutGetResolutionStatus error behavior (#3044)
---
src/core/libraries/videoout/video_out.cpp | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp
index c5208b6dd..da715b3bf 100644
--- a/src/core/libraries/videoout/video_out.cpp
+++ b/src/core/libraries/videoout/video_out.cpp
@@ -282,7 +282,12 @@ s32 PS4_SYSV_ABI sceVideoOutGetVblankStatus(int handle, SceVideoOutVblankStatus*
s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status) {
LOG_INFO(Lib_VideoOut, "called");
- *status = driver->GetPort(handle)->resolution;
+ auto* port = driver->GetPort(handle);
+ if (!port || !port->is_open) {
+ return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
+ }
+
+ *status = port->resolution;
return ORBIS_OK;
}
From fff3bf9917faef7a185cae896efce87bea7b5b50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?=
Date: Thu, 5 Jun 2025 23:33:25 +0200
Subject: [PATCH 08/28] s_flbit_i32_b64 (#3033)
* s_flbit_i32_b64
* Split FindUMsb64 into two 32bit ops
---
.../backend/spirv/emit_spirv_instructions.h | 1 +
.../backend/spirv/emit_spirv_integer.cpp | 14 ++++++++++++++
.../frontend/translate/scalar_alu.cpp | 13 +++++++++++++
.../frontend/translate/translate.h | 1 +
src/shader_recompiler/ir/ir_emitter.cpp | 11 +++++++++--
src/shader_recompiler/ir/ir_emitter.h | 2 +-
src/shader_recompiler/ir/opcodes.inc | 1 +
7 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 09f9732bf..172358866 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -372,6 +372,7 @@ Id EmitBitCount64(EmitContext& ctx, Id value);
Id EmitBitwiseNot32(EmitContext& ctx, Id value);
Id EmitFindSMsb32(EmitContext& ctx, Id value);
Id EmitFindUMsb32(EmitContext& ctx, Id value);
+Id EmitFindUMsb64(EmitContext& ctx, Id value);
Id EmitFindILsb32(EmitContext& ctx, Id value);
Id EmitFindILsb64(EmitContext& ctx, Id value);
Id EmitSMin32(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index 10bfbb2ab..1a995354d 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -229,6 +229,20 @@ Id EmitFindUMsb32(EmitContext& ctx, Id value) {
return ctx.OpFindUMsb(ctx.U32[1], value);
}
+Id EmitFindUMsb64(EmitContext& ctx, Id value) {
+ // Vulkan restricts some bitwise operations to 32-bit only, so decompose into
+ // two 32-bit values and select the correct result.
+ const Id unpacked{ctx.OpBitcast(ctx.U32[2], value)};
+ const Id hi{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 1U)};
+ const Id lo{ctx.OpCompositeExtract(ctx.U32[1], unpacked, 0U)};
+ const Id hi_msb{ctx.OpFindUMsb(ctx.U32[1], hi)};
+ const Id lo_msb{ctx.OpFindUMsb(ctx.U32[1], lo)};
+ const Id found_hi{ctx.OpINotEqual(ctx.U1[1], hi_msb, ctx.ConstU32(u32(-1)))};
+ const Id shifted_hi{ctx.OpIAdd(ctx.U32[1], hi_msb, ctx.ConstU32(32u))};
+ // value == 0 case is checked in IREmitter
+ return ctx.OpSelect(ctx.U32[1], found_hi, shifted_hi, lo_msb);
+}
+
Id EmitFindILsb32(EmitContext& ctx, Id value) {
return ctx.OpFindILsb(ctx.U32[1], value);
}
diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp
index 3a8e894ae..7beb594c3 100644
--- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp
@@ -114,6 +114,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_FF1_I32_B64(inst);
case Opcode::S_FLBIT_I32_B32:
return S_FLBIT_I32_B32(inst);
+ case Opcode::S_FLBIT_I32_B64:
+ return S_FLBIT_I32_B64(inst);
case Opcode::S_BITSET0_B32:
return S_BITSET_B32(inst, 0);
case Opcode::S_BITSET1_B32:
@@ -686,6 +688,17 @@ void Translator::S_FLBIT_I32_B32(const GcnInst& inst) {
SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
}
+void Translator::S_FLBIT_I32_B64(const GcnInst& inst) {
+ const IR::U64 src0{GetSrc64(inst.src[0])};
+ // Gcn wants the MSB position counting from the left, but SPIR-V counts from the rightmost (LSB)
+ // position
+ const IR::U32 msb_pos = ir.FindUMsb(src0);
+ const IR::U32 pos_from_left = ir.ISub(ir.Imm32(63), msb_pos);
+ // Select 0xFFFFFFFF if src0 was 0
+ const IR::U1 cond = ir.INotEqual(src0, ir.Imm64(u64(0u)));
+ SetDst(inst.dst[0], IR::U32{ir.Select(cond, pos_from_left, ir.Imm32(~0U))});
+}
+
void Translator::S_BITSET_B32(const GcnInst& inst, u32 bit_value) {
const IR::U32 old_value{GetSrc(inst.dst[0])};
const IR::U32 offset{ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0U), ir.Imm32(5U))};
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index 2584d5c5e..15ba8c8d7 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -121,6 +121,7 @@ public:
void S_FF1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B64(const GcnInst& inst);
void S_FLBIT_I32_B32(const GcnInst& inst);
+ void S_FLBIT_I32_B64(const GcnInst& inst);
void S_BITSET_B32(const GcnInst& inst, u32 bit_value);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 01d945178..dcb734d01 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -1546,8 +1546,15 @@ U32 IREmitter::FindSMsb(const U32& value) {
return Inst(Opcode::FindSMsb32, value);
}
-U32 IREmitter::FindUMsb(const U32& value) {
- return Inst(Opcode::FindUMsb32, value);
+U32 IREmitter::FindUMsb(const U32U64& value) {
+ switch (value.Type()) {
+ case Type::U32:
+ return Inst(Opcode::FindUMsb32, value);
+ case Type::U64:
+ return Inst(Opcode::FindUMsb64, value);
+ default:
+ ThrowInvalidType(value.Type());
+ }
}
U32 IREmitter::FindILsb(const U32U64& value) {
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index 8f8a12736..da7adf42b 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -266,7 +266,7 @@ public:
[[nodiscard]] U32 BitwiseNot(const U32& value);
[[nodiscard]] U32 FindSMsb(const U32& value);
- [[nodiscard]] U32 FindUMsb(const U32& value);
+ [[nodiscard]] U32 FindUMsb(const U32U64& value);
[[nodiscard]] U32 FindILsb(const U32U64& value);
[[nodiscard]] U32 SMin(const U32& a, const U32& b);
[[nodiscard]] U32 UMin(const U32& a, const U32& b);
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index ab6dbfde9..647432bcf 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -349,6 +349,7 @@ OPCODE(BitwiseNot32, U32, U32,
OPCODE(FindSMsb32, U32, U32, )
OPCODE(FindUMsb32, U32, U32, )
+OPCODE(FindUMsb64, U32, U64, )
OPCODE(FindILsb32, U32, U32, )
OPCODE(FindILsb64, U32, U64, )
OPCODE(SMin32, U32, U32, U32, )
From 91d29459fb55cb0d28006639e7a38134c5a368ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?=
Date: Fri, 6 Jun 2025 05:19:05 +0200
Subject: [PATCH 09/28] Implement PM4CondExec (#3046)
---
src/video_core/amdgpu/liverpool.cpp | 13 +++++++++++++
src/video_core/amdgpu/pm4_cmds.h | 21 +++++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp
index 118c43cef..e031d0ebc 100644
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@@ -765,6 +765,19 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header);
+ if (cond_exec->command.Value() != 0) {
+ LOG_WARNING(Render, "IT_COND_EXEC used a reserved command");
+ }
+ const auto skip = *cond_exec->Address() == false;
+ if (skip) {
+ dcb = NextPacket(dcb,
+ header->type3.NumWords() + 1 + cond_exec->exec_count.Value());
+ continue;
+ }
+ break;
+ }
default:
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast(opcode), count);
diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h
index 011e47bf0..23c1b8f21 100644
--- a/src/video_core/amdgpu/pm4_cmds.h
+++ b/src/video_core/amdgpu/pm4_cmds.h
@@ -1159,4 +1159,25 @@ struct PM4CmdMemSemaphore {
}
};
+struct PM4CmdCondExec {
+ PM4Type3Header header;
+ union {
+ BitField<2, 30, u32> bool_addr_lo; ///< low 32 address bits for the block in memory from
+ ///< where the CP will fetch the condition
+ };
+ union {
+ BitField<0, 16, u32> bool_addr_hi; ///< high address bits for the condition
+ BitField<28, 4, u32> command;
+ };
+ union {
+ BitField<0, 14, u32> exec_count; ///< Number of DWords that the CP will skip
+ ///< if bool pointed to is zero
+ };
+
+ bool* Address() const {
+ return std::bit_cast(u64(bool_addr_hi.Value()) << 32 | u64(bool_addr_lo.Value())
+ << 2);
+ }
+};
+
} // namespace AmdGpu
From 5edd9ff54b6baca5025dff0f5491ddf0d0746a19 Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Sat, 7 Jun 2025 16:17:45 -0500
Subject: [PATCH 10/28] Improved sceKernelMapNamedFlexibleMemory logging
(#3050)
* More descriptive sceKernelMapNamedFlexibleMemory logging
* Misc exports
These functions are used by Overwatch: Origins Edition
* Clang
* Function parameter cleanup
Changes the parameters on our sceKernelMapNamedFlexibleMemory and sceKernelMapFlexibleMemory functions to better align with our current standards.
---
src/core/libraries/kernel/kernel.cpp | 4 ++++
src/core/libraries/kernel/memory.cpp | 21 +++++++++------------
src/core/libraries/kernel/memory.h | 7 +++----
src/core/libraries/kernel/threads/mutex.cpp | 1 +
4 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp
index 180850217..930640d0e 100644
--- a/src/core/libraries/kernel/kernel.cpp
+++ b/src/core/libraries/kernel/kernel.cpp
@@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
+ LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
+ LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
+ LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
+ LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
}
} // namespace Libraries::Kernel
diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp
index 18676cbdf..5e94199e1 100644
--- a/src/core/libraries/kernel/memory.cpp
+++ b/src/core/libraries/kernel/memory.cpp
@@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
return ret;
}
-s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
- int flags, const char* name) {
-
+s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
+ const char* name) {
+ LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
+ fmt::ptr(*addr_in_out), len, prot, flags, name);
if (len == 0 || !Common::Is16KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
return ORBIS_KERNEL_ERROR_EINVAL;
@@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
const VAddr in_addr = reinterpret_cast(*addr_in_out);
const auto mem_prot = static_cast(prot);
const auto map_flags = static_cast(flags);
- SCOPE_EXIT {
- LOG_INFO(Kernel_Vmm,
- "in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
- in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
- };
auto* memory = Core::Memory::Instance();
- return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
- Core::VMAType::Flexible, name);
+ const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
+ Core::VMAType::Flexible, name);
+ LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
+ return ret;
}
-s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
- int flags) {
+s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
}
diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h
index 6cefe0d07..ea42e7546 100644
--- a/src/core/libraries/kernel/memory.h
+++ b/src/core/libraries/kernel/memory.h
@@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
size_t infoSize);
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
-s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot,
- int flags, const char* name);
-s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
- int flags);
+s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
+ const char* name);
+s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
diff --git a/src/core/libraries/kernel/threads/mutex.cpp b/src/core/libraries/kernel/threads/mutex.cpp
index 956e5ef65..3dbade96a 100644
--- a/src/core/libraries/kernel/threads/mutex.cpp
+++ b/src/core/libraries/kernel/threads/mutex.cpp
@@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
// Posix
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
+ LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
From 2857ef34f037ac8e96730faabdf7dd3511c7d6af Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Sun, 8 Jun 2025 13:04:43 -0500
Subject: [PATCH 11/28] Don't coalesce dmem pages (#3059)
Looks like this change is what broke P.T.
I'll need to look closer at this when I have a chance, clearly we're doing something wrong here.
---
src/core/memory.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index ba3640877..54cae910b 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -182,7 +182,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
auto& area = CarveDmemArea(mapping_start, size)->second;
area.memory_type = memory_type;
area.is_free = false;
- MergeAdjacent(dmem_map, dmem_area);
return mapping_start;
}
From 5d064dd89f17b73be3cae84bd0bf98a7b02d6997 Mon Sep 17 00:00:00 2001
From: Fire Cube
Date: Sun, 8 Jun 2025 20:04:55 +0200
Subject: [PATCH 12/28] Dev Tools: Fix Module Viewer HLE detection (#3058)
* fix
* clang
---
src/core/linker.cpp | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/src/core/linker.cpp b/src/core/linker.cpp
index c50b03a8f..1f45caf12 100644
--- a/src/core/linker.cpp
+++ b/src/core/linker.cpp
@@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
sr.type = sym_type;
const auto* record = m_hle_symbols.FindSymbol(sr);
- if (!record) {
- // Check if it an export function
- const auto* p = FindExportedModule(*module, *library);
- if (p && p->export_sym.GetSize() > 0) {
- record = p->export_sym.FindSymbol(sr);
- }
- }
if (record) {
*return_info = *record;
-
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
-
return true;
}
+ // Check if it an export function
+ const auto* p = FindExportedModule(*module, *library);
+ if (p && p->export_sym.GetSize() > 0) {
+ record = p->export_sym.FindSymbol(sr);
+ if (record) {
+ *return_info = *record;
+ return true;
+ }
+ }
+
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
if (aeronid) {
return_info->name = aeronid->name;
From 2bc199a41be994eaad6abd0db978808aac3cb2c0 Mon Sep 17 00:00:00 2001
From: Mahmoud Adel <94652220+AboMedoz@users.noreply.github.com>
Date: Sun, 8 Jun 2025 21:33:08 +0300
Subject: [PATCH 13/28] black image error fix (#3051)
---
src/video_core/texture_cache/texture_cache.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
index 63cfc4431..4b173c313 100644
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -299,6 +299,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
auto& new_image = slot_images[new_image_id];
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
+ RefreshImage(new_image);
new_image.CopyImage(src_image);
if (src_image.binding.is_bound || src_image.binding.is_target) {
From 952cef5a154c231d0b8879a6caca519cfaebb670 Mon Sep 17 00:00:00 2001
From: TheTurtle
Date: Sun, 8 Jun 2025 21:38:58 +0300
Subject: [PATCH 14/28] shader_recompiler: Implement dual source blending
(#3054)
---
.../backend/spirv/spirv_emit_context.cpp | 15 +++++++++++++--
.../frontend/translate/export.cpp | 10 ++++++++--
src/shader_recompiler/runtime_info.h | 2 ++
.../renderer_vulkan/liverpool_to_vk.cpp | 13 +++++++++++++
src/video_core/renderer_vulkan/liverpool_to_vk.h | 2 ++
.../renderer_vulkan/vk_pipeline_cache.cpp | 9 +++++++++
6 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 68bfcc0d0..bd10fd3df 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -634,7 +634,8 @@ void EmitContext::DefineOutputs() {
}
break;
}
- case LogicalStage::Fragment:
+ case LogicalStage::Fragment: {
+ u32 num_render_targets = 0;
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
@@ -643,11 +644,21 @@ void EmitContext::DefineOutputs() {
const u32 num_components = info.stores.NumComponents(mrt);
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
const Id type{GetAttributeType(*this, num_format)[num_components]};
- const Id id{DefineOutput(type, i)};
+ Id id;
+ if (runtime_info.fs_info.dual_source_blending) {
+ id = DefineOutput(type, 0);
+ Decorate(id, spv::Decoration::Index, i);
+ } else {
+ id = DefineOutput(type, i);
+ }
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
+ ++num_render_targets;
}
+ ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
+ "Dual source blending enabled, there must be exactly two MRT exports");
break;
+ }
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp
index 0abef2e81..8a99f38a9 100644
--- a/src/shader_recompiler/frontend/translate/export.cpp
+++ b/src/shader_recompiler/frontend/translate/export.cpp
@@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32
}
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
- const u32 color_buffer_idx =
+ u32 color_buffer_idx =
static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0);
+ if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
+ color_buffer_idx = 0;
+ }
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
AmdGpu::NumberFormat num_format;
@@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
}
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
- const u32 color_buffer_idx =
+ u32 color_buffer_idx =
static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0);
+ if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
+ color_buffer_idx = 0;
+ }
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index b8ed42f5b..53d2d5303 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -196,11 +196,13 @@ struct FragmentRuntimeInfo {
u32 num_inputs;
std::array inputs;
std::array color_buffers;
+ bool dual_source_blending;
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) &&
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
num_inputs == other.num_inputs &&
+ dual_source_blending == other.dual_source_blending &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
other.inputs.begin() + num_inputs);
}
diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
index a6ae0c304..5972296c0 100644
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
}
}
+bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
+ using BlendFactor = Liverpool::BlendControl::BlendFactor;
+ switch (factor) {
+ case BlendFactor::Src1Color:
+ case BlendFactor::Src1Alpha:
+ case BlendFactor::InvSrc1Color:
+ case BlendFactor::InvSrc1Alpha:
+ return true;
+ default:
+ return false;
+ }
+}
+
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
using BlendFunc = Liverpool::BlendControl::BlendFunc;
switch (func) {
diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h
index fca0a8378..61fd4a8c1 100644
--- a/src/video_core/renderer_vulkan/liverpool_to_vk.h
+++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h
@@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode);
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
+bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
+
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index d7ad47a3c..b72f77e55 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -158,6 +158,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
info.fs_info.addr_flags = regs.ps_input_addr;
const auto& ps_inputs = regs.ps_inputs;
info.fs_info.num_inputs = regs.num_interp;
+ const auto& cb0_blend = regs.blend_control[0];
+ info.fs_info.dual_source_blending =
+ LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) ||
+ LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor);
+ if (cb0_blend.separate_alpha_blend) {
+ info.fs_info.dual_source_blending |=
+ LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) ||
+ LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor);
+ }
for (u32 i = 0; i < regs.num_interp; i++) {
info.fs_info.inputs[i] = {
.param_index = u8(ps_inputs[i].input_offset.Value()),
From ce84e80f65745c9f00981e4dbcfe79ef1a11cfe6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?=
Date: Sun, 8 Jun 2025 20:43:58 +0200
Subject: [PATCH 15/28] BUFFER_ATOMIC_CMPSWAP (#3045)
---
.../backend/spirv/emit_spirv_atomic.cpp | 22 +++++++++++++++++++
.../backend/spirv/emit_spirv_instructions.h | 2 ++
.../frontend/translate/vector_memory.cpp | 4 ++++
src/shader_recompiler/ir/ir_emitter.cpp | 5 +++++
src/shader_recompiler/ir/ir_emitter.h | 3 +++
src/shader_recompiler/ir/opcodes.inc | 1 +
6 files changed, 37 insertions(+)
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
index d7c73ca8f..a342b47b6 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -68,6 +68,22 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
});
}
+Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id cmp_value,
+ Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
+ const auto& buffer = ctx.buffers[handle];
+ if (Sirit::ValidId(buffer.offset)) {
+ address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
+ }
+ const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
+ const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
+ const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
+ const auto [scope, semantics]{AtomicArgs(ctx)};
+ return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
+ return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
+ });
+}
+
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
@@ -175,6 +191,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
}
+Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id cmp_value) {
+ return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
+ &Sirit::Module::OpAtomicCompareExchange);
+}
+
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index 172358866..b9707224c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -96,6 +96,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
+Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
+ Id cmp_value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index 5c972c607..8c035f26c 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -331,6 +331,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
+ case AtomicOp::CmpSwap: {
+ IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
+ return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
+ }
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
case AtomicOp::Smin:
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index dcb734d01..07249edfe 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -513,6 +513,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
}
+Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata,
+ const Value& cmp_value, BufferInstInfo info) {
+ return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value);
+}
+
U32 IREmitter::DataAppend(const U32& counter) {
return Inst(Opcode::DataAppend, counter, Imm32(0));
}
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index da7adf42b..7b9b81093 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -150,6 +150,9 @@ public:
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
+ [[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address,
+ const Value& value, const Value& cmp_value,
+ BufferInstInfo info);
[[nodiscard]] U32 DataAppend(const U32& counter);
[[nodiscard]] U32 DataConsume(const U32& counter);
diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc
index 647432bcf..5b3216be6 100644
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@@ -126,6 +126,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
+OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
From f2bbb6847dc22037c50a5723acd70195d30245c9 Mon Sep 17 00:00:00 2001
From: squidbus <175574877+squidbus@users.noreply.github.com>
Date: Sun, 8 Jun 2025 11:53:11 -0700
Subject: [PATCH 16/28] fix: Missing switch case for BUFFER_ATOMIC_CMPSWAP
---
src/shader_recompiler/frontend/translate/vector_memory.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp
index 8c035f26c..5eb2079a4 100644
--- a/src/shader_recompiler/frontend/translate/vector_memory.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp
@@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::Add, inst);
case Opcode::BUFFER_ATOMIC_SWAP:
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
+ case Opcode::BUFFER_ATOMIC_CMPSWAP:
+ return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
case Opcode::BUFFER_ATOMIC_SMIN:
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_UMIN:
@@ -332,7 +334,7 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
case AtomicOp::Swap:
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
case AtomicOp::CmpSwap: {
- IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
+ const IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
}
case AtomicOp::Add:
From a07a6bb9d3519fe128538a0c5d537e1e1e5bfbe1 Mon Sep 17 00:00:00 2001
From: TheTurtle
Date: Sun, 8 Jun 2025 22:14:09 +0300
Subject: [PATCH 17/28] buffer_cache: Better image search for buffer validation
(#3057)
---
src/video_core/buffer_cache/buffer_cache.cpp | 51 ++++++++++++++------
1 file changed, 36 insertions(+), 15 deletions(-)
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index 4717a5ff8..8a5283d83 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -798,24 +798,45 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
}
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
- static constexpr FindFlags find_flags =
- FindFlags::NoCreate | FindFlags::RelaxDim | FindFlags::RelaxFmt | FindFlags::RelaxSize;
- TextureCache::BaseDesc desc{};
- desc.info.guest_address = device_addr;
- desc.info.guest_size = size;
- const ImageId image_id = texture_cache.FindImage(desc, find_flags);
- if (!image_id) {
+ boost::container::small_vector image_ids;
+ texture_cache.ForEachImageInRegion(device_addr, size, [&](ImageId image_id, Image& image) {
+ if (image.info.guest_address != device_addr) {
+ return;
+ }
+ // Only perform sync if image is:
+ // - GPU modified; otherwise there are no changes to synchronize.
+ // - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
+ // - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
+ if (False(image.flags & ImageFlagBits::GpuModified) ||
+ True(image.flags & ImageFlagBits::Dirty)) {
+ return;
+ }
+ image_ids.push_back(image_id);
+ });
+ if (image_ids.empty()) {
return false;
}
+ ImageId image_id{};
+ if (image_ids.size() == 1) {
+ // Sometimes image size might not exactly match with requested buffer size
+ // If we only found 1 candidate image use it without too many questions.
+ image_id = image_ids[0];
+ } else {
+ for (s32 i = 0; i < image_ids.size(); ++i) {
+ Image& image = texture_cache.GetImage(image_ids[i]);
+ if (image.info.guest_size == size) {
+ image_id = image_ids[i];
+ break;
+ }
+ }
+ if (!image_id) {
+ LOG_WARNING(Render_Vulkan,
+ "Failed to find exact image match for copy addr={:#x}, size={:#x}",
+ device_addr, size);
+ return false;
+ }
+ }
Image& image = texture_cache.GetImage(image_id);
- // Only perform sync if image is:
- // - GPU modified; otherwise there are no changes to synchronize.
- // - Not CPU dirty; otherwise we could overwrite CPU changes with stale GPU changes.
- // - Not GPU dirty; otherwise we could overwrite GPU changes with stale image data.
- if (False(image.flags & ImageFlagBits::GpuModified) ||
- True(image.flags & ImageFlagBits::Dirty)) {
- return false;
- }
ASSERT_MSG(device_addr == image.info.guest_address,
"Texel buffer aliases image subresources {:x} : {:x}", device_addr,
image.info.guest_address);
From 5004e41100d6749dec1775ce335d7de3d93b46a8 Mon Sep 17 00:00:00 2001
From: Paris Oplopoios
Date: Sun, 8 Jun 2025 22:29:33 +0300
Subject: [PATCH 18/28] Patch movntss and movntsd (#3049)
* Patch movntss and movntsd
* clang-format
* Deduplication
* Allow rep to be in other places
---
src/core/cpu_patches.cpp | 53 +++++++++++++++++++++++++++++++++++-----
1 file changed, 47 insertions(+), 6 deletions(-)
diff --git a/src/core/cpu_patches.cpp b/src/core/cpu_patches.cpp
index 8937ef04b..8512858e9 100644
--- a/src/core/cpu_patches.cpp
+++ b/src/core/cpu_patches.cpp
@@ -88,7 +88,8 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
dst_op.reg.value <= ZYDIS_REGISTER_R15;
}
-static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
+static void GenerateTcbAccess(void* /* address */, const ZydisDecodedOperand* operands,
+ Xbyak::CodeGenerator& c) {
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
#if defined(_WIN32)
@@ -126,7 +127,8 @@ static bool FilterNoSSE4a(const ZydisDecodedOperand*) {
return !cpu.has(Cpu::tSSE4a);
}
-static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
+static void GenerateEXTRQ(void* /* address */, const ZydisDecodedOperand* operands,
+ Xbyak::CodeGenerator& c) {
bool immediateForm = operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@@ -245,7 +247,8 @@ static void GenerateEXTRQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenera
}
}
-static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
+static void GenerateINSERTQ(void* /* address */, const ZydisDecodedOperand* operands,
+ Xbyak::CodeGenerator& c) {
bool immediateForm = operands[2].type == ZYDIS_OPERAND_TYPE_IMMEDIATE &&
operands[3].type == ZYDIS_OPERAND_TYPE_IMMEDIATE;
@@ -383,8 +386,44 @@ static void GenerateINSERTQ(const ZydisDecodedOperand* operands, Xbyak::CodeGene
}
}
+static void ReplaceMOVNT(void* address, u8 rep_prefix) {
+ // Find the opcode byte
+ // There can be any amount of prefixes but the instruction can't be more than 15 bytes
+ // And we know for sure this is a MOVNTSS/MOVNTSD
+ bool found = false;
+ bool rep_prefix_found = false;
+ int index = 0;
+ u8* ptr = reinterpret_cast(address);
+ for (int i = 0; i < 15; i++) {
+ if (ptr[i] == rep_prefix) {
+ rep_prefix_found = true;
+ } else if (ptr[i] == 0x2B) {
+ index = i;
+ found = true;
+ break;
+ }
+ }
+
+ // Some sanity checks
+ ASSERT(found);
+ ASSERT(index >= 2);
+ ASSERT(ptr[index - 1] == 0x0F);
+ ASSERT(rep_prefix_found);
+
+ // This turns the MOVNTSS/MOVNTSD to a MOVSS/MOVSD m, xmm
+ ptr[index] = 0x11;
+}
+
+static void ReplaceMOVNTSS(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
+ ReplaceMOVNT(address, 0xF3);
+}
+
+static void ReplaceMOVNTSD(void* address, const ZydisDecodedOperand*, Xbyak::CodeGenerator&) {
+ ReplaceMOVNT(address, 0xF2);
+}
+
using PatchFilter = bool (*)(const ZydisDecodedOperand*);
-using InstructionGenerator = void (*)(const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
+using InstructionGenerator = void (*)(void*, const ZydisDecodedOperand*, Xbyak::CodeGenerator&);
struct PatchInfo {
/// Filter for more granular patch conditions past just the instruction mnemonic.
PatchFilter filter;
@@ -400,6 +439,8 @@ static const std::unordered_map Patches = {
// SSE4a
{ZYDIS_MNEMONIC_EXTRQ, {FilterNoSSE4a, GenerateEXTRQ, true}},
{ZYDIS_MNEMONIC_INSERTQ, {FilterNoSSE4a, GenerateINSERTQ, true}},
+ {ZYDIS_MNEMONIC_MOVNTSS, {FilterNoSSE4a, ReplaceMOVNTSS, false}},
+ {ZYDIS_MNEMONIC_MOVNTSD, {FilterNoSSE4a, ReplaceMOVNTSD, false}},
#if defined(_WIN32)
// Windows needs a trampoline.
@@ -477,7 +518,7 @@ static std::pair TryPatch(u8* code, PatchModule* module) {
auto& trampoline_gen = module->trampoline_gen;
const auto trampoline_ptr = trampoline_gen.getCurr();
- patch_info.generator(operands, trampoline_gen);
+ patch_info.generator(code, operands, trampoline_gen);
// Return to the following instruction at the end of the trampoline.
trampoline_gen.jmp(code + instruction.length);
@@ -485,7 +526,7 @@ static std::pair TryPatch(u8* code, PatchModule* module) {
// Replace instruction with near jump to the trampoline.
patch_gen.jmp(trampoline_ptr, Xbyak::CodeGenerator::LabelType::T_NEAR);
} else {
- patch_info.generator(operands, patch_gen);
+ patch_info.generator(code, operands, patch_gen);
}
const auto patch_size = patch_gen.getCurr() - code;
From 8ffcfc87bd9bcd8396cde82eec9daf2a250fd018 Mon Sep 17 00:00:00 2001
From: TheTurtle
Date: Sun, 8 Jun 2025 22:46:34 +0300
Subject: [PATCH 19/28] shader_recompiler: Implement linear interpolation
support (#3055)
---
.../backend/spirv/spirv_emit_context.cpp | 23 +-
.../backend/spirv/spirv_emit_context.h | 4 +-
.../frontend/structured_control_flow.cpp | 12 +-
.../frontend/translate/translate.cpp | 135 ++++----
.../frontend/translate/translate.h | 16 +-
.../translate/vector_interpolation.cpp | 5 +-
src/shader_recompiler/info.h | 4 +
src/shader_recompiler/ir/attribute.h | 19 ++
src/shader_recompiler/ir/ir_emitter.cpp | 1 -
src/shader_recompiler/ir/ir_emitter.h | 2 +-
src/shader_recompiler/ir/reg.h | 2 +-
src/video_core/amdgpu/pixel_format.h | 300 +++++++++++++++++-
src/video_core/amdgpu/resource.h | 1 -
src/video_core/amdgpu/types.h | 276 ----------------
14 files changed, 425 insertions(+), 375 deletions(-)
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index bd10fd3df..9e51f8e60 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -307,7 +307,9 @@ void EmitContext::DefineInterpolatedAttribs() {
const Id p2{OpCompositeExtract(F32[4], p_array, 2U)};
const Id p10{OpFSub(F32[4], p1, p0)};
const Id p20{OpFSub(F32[4], p2, p0)};
- const Id bary_coord{OpLoad(F32[3], gl_bary_coord_id)};
+ const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i])
+ ? bary_coord_linear_id
+ : bary_coord_persp_id)};
const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)};
const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)};
const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)};
@@ -411,8 +413,14 @@ void EmitContext::DefineInputs() {
DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
}
if (profile.needs_manual_interpolation) {
- gl_bary_coord_id =
- DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
+ if (info.has_perspective_interp) {
+ bary_coord_persp_id =
+ DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input);
+ }
+ if (info.has_linear_interp) {
+ bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR,
+ spv::StorageClass::Input);
+ }
}
for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
const auto& input = runtime_info.fs_info.inputs[i];
@@ -435,9 +443,12 @@ void EmitContext::DefineInputs() {
} else {
attr_id = DefineInput(type, semantic);
Name(attr_id, fmt::format("fs_in_attr{}", semantic));
- }
- if (input.is_flat) {
- Decorate(attr_id, spv::Decoration::Flat);
+
+ if (input.is_flat) {
+ Decorate(attr_id, spv::Decoration::Flat);
+ } else if (IsLinear(info.interp_qualifiers[i])) {
+ Decorate(attr_id, spv::Decoration::NoPerspective);
+ }
}
input_params[semantic] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index a2e0d2f47..20d936cf0 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -293,8 +293,8 @@ public:
Id shared_memory_u32_type{};
- Id interpolate_func{};
- Id gl_bary_coord_id{};
+ Id bary_coord_persp_id{};
+ Id bary_coord_linear_id{};
struct TextureDefinition {
const VectorIds* data_types;
diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp
index 11b40d07c..1a7a43f4d 100644
--- a/src/shader_recompiler/frontend/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/structured_control_flow.cpp
@@ -605,11 +605,12 @@ public:
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
- runtime_info{runtime_info_}, profile{profile_} {
+ runtime_info{runtime_info_}, profile{profile_},
+ translator{info_, runtime_info_, profile_} {
Visit(root_stmt, nullptr, nullptr);
- IR::Block& first_block{*syntax_list.front().data.block};
- Translator{&first_block, info, runtime_info, profile}.EmitPrologue();
+ IR::Block* first_block = syntax_list.front().data.block;
+ translator.EmitPrologue(first_block);
}
private:
@@ -637,8 +638,8 @@ private:
current_block->has_multiple_predecessors = stmt.block->num_predecessors > 1;
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
- Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
- info, runtime_info, profile);
+ translator.Translate(current_block, stmt.block->begin,
+ inst_list.subspan(start, size));
}
break;
}
@@ -820,6 +821,7 @@ private:
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
+ Translator translator;
};
} // Anonymous namespace
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index 5675adf3c..5853f3e72 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -21,16 +21,60 @@
namespace Shader::Gcn {
-static u32 next_vgpr_num;
-static std::unordered_map vgpr_map;
-
-Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
- const Profile& profile_)
- : ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {
- next_vgpr_num = vgpr_map.empty() ? runtime_info.num_allocated_vgprs : next_vgpr_num;
+Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
+ : info{info_}, runtime_info{runtime_info_}, profile{profile_},
+ next_vgpr_num{runtime_info.num_allocated_vgprs} {
+ if (info.l_stage == LogicalStage::Fragment) {
+ dst_frag_vreg = GatherInterpQualifiers();
+ }
}
-void Translator::EmitPrologue() {
+IR::VectorReg Translator::GatherInterpQualifiers() {
+ u32 dst_vreg{};
+ if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J
+ info.has_perspective_interp = true;
+ }
+ if (runtime_info.fs_info.addr_flags.persp_center_ena) {
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J
+ info.has_perspective_interp = true;
+ }
+ if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J
+ info.has_perspective_interp = true;
+ }
+ if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
+ ++dst_vreg; // I/W
+ ++dst_vreg; // J/W
+ ++dst_vreg; // 1/W
+ }
+ if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J
+ info.has_linear_interp = true;
+ }
+ if (runtime_info.fs_info.addr_flags.linear_center_ena) {
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J
+ info.has_linear_interp = true;
+ }
+ if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I
+ vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J
+ info.has_linear_interp = true;
+ }
+ if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
+ ++dst_vreg;
+ }
+ return IR::VectorReg(dst_vreg);
+}
+
+void Translator::EmitPrologue(IR::Block* first_block) {
+ ir = IR::IREmitter(*first_block, first_block->begin());
+
ir.Prologue();
ir.SetExec(ir.Imm1(true));
@@ -60,39 +104,7 @@ void Translator::EmitPrologue() {
}
break;
case LogicalStage::Fragment:
- dst_vreg = IR::VectorReg::V0;
- if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
- ++dst_vreg; // I
- ++dst_vreg; // J
- }
- if (runtime_info.fs_info.addr_flags.persp_center_ena) {
- ++dst_vreg; // I
- ++dst_vreg; // J
- }
- if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
- ++dst_vreg; // I
- ++dst_vreg; // J
- }
- if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
- ++dst_vreg; // I/W
- ++dst_vreg; // J/W
- ++dst_vreg; // 1/W
- }
- if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
- ++dst_vreg; // I
- ++dst_vreg; // J
- }
- if (runtime_info.fs_info.addr_flags.linear_center_ena) {
- ++dst_vreg; // I
- ++dst_vreg; // J
- }
- if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
- ++dst_vreg; // I
- ++dst_vreg; // J
- }
- if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
- ++dst_vreg;
- }
+ dst_vreg = dst_frag_vreg;
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
@@ -543,6 +555,26 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
info.translation_failed = true;
}
+void Translator::Translate(IR::Block* block, u32 pc, std::span inst_list) {
+ if (inst_list.empty()) {
+ return;
+ }
+ ir = IR::IREmitter{*block, block->begin()};
+ for (const auto& inst : inst_list) {
+ pc += inst.length;
+
+ // Special case for emitting fetch shader.
+ if (inst.opcode == Opcode::S_SWAPPC_B64) {
+ ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
+ info.stage == Stage::Local);
+ EmitFetch(inst);
+ continue;
+ }
+
+ TranslateInstruction(inst, pc);
+ }
+}
+
void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
// Emit instructions for each category.
switch (inst.category) {
@@ -577,25 +609,4 @@ void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) {
}
}
-void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info,
- const RuntimeInfo& runtime_info, const Profile& profile) {
- if (inst_list.empty()) {
- return;
- }
- Translator translator{block, info, runtime_info, profile};
- for (const auto& inst : inst_list) {
- pc += inst.length;
-
- // Special case for emitting fetch shader.
- if (inst.opcode == Opcode::S_SWAPPC_B64) {
- ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
- info.stage == Stage::Local);
- translator.EmitFetch(inst);
- continue;
- }
-
- translator.TranslateInstruction(inst, pc);
- }
-}
-
} // namespace Shader::Gcn
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index 15ba8c8d7..f8ffb9638 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -53,15 +53,17 @@ enum class NegateMode : u32 {
Result,
};
+static constexpr size_t MaxInterpVgpr = 16;
+
class Translator {
public:
- explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
- const Profile& profile);
+ explicit Translator(Info& info, const RuntimeInfo& runtime_info, const Profile& profile);
+ void Translate(IR::Block* block, u32 pc, std::span inst_list);
void TranslateInstruction(const GcnInst& inst, u32 pc);
// Instruction categories
- void EmitPrologue();
+ void EmitPrologue(IR::Block* first_block);
void EmitFetch(const GcnInst& inst);
void EmitExport(const GcnInst& inst);
void EmitFlowControl(u32 pc, const GcnInst& inst);
@@ -326,16 +328,18 @@ private:
void LogMissingOpcode(const GcnInst& inst);
IR::VectorReg GetScratchVgpr(u32 offset);
+ IR::VectorReg GatherInterpQualifiers();
private:
IR::IREmitter ir;
Info& info;
const RuntimeInfo& runtime_info;
const Profile& profile;
+ u32 next_vgpr_num;
+ std::unordered_map vgpr_map;
+ std::array vgpr_to_interp{};
+ IR::VectorReg dst_frag_vreg{};
bool opcode_missing = false;
};
-void Translate(IR::Block* block, u32 block_base, std::span inst_list, Info& info,
- const RuntimeInfo& runtime_info, const Profile& profile);
-
} // namespace Shader::Gcn
diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp
index 431cb2f04..2d7297c12 100644
--- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp
@@ -22,13 +22,14 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
// VINTRP
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
- auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
+ const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
+ info.interp_qualifiers[attr.param_index] = vgpr_to_interp[inst.src[0].code];
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
}
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
- auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
+ const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
}
diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h
index 24e0741c1..e14c7988d 100644
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@@ -193,6 +193,8 @@ struct Info {
PersistentSrtInfo srt_info;
std::vector flattened_ud_buf;
+ std::array interp_qualifiers{};
+
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
s32 tess_consts_dword_offset = -1;
@@ -206,6 +208,8 @@ struct Info {
bool has_discard{};
bool has_image_gather{};
bool has_image_query{};
+ bool has_perspective_interp{};
+ bool has_linear_interp{};
bool uses_atomic_float_min_max{};
bool uses_lane_id{};
bool uses_group_quad{};
diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h
index 5117f5650..68472f052 100644
--- a/src/shader_recompiler/ir/attribute.h
+++ b/src/shader_recompiler/ir/attribute.h
@@ -83,6 +83,16 @@ enum class Attribute : u64 {
Max,
};
+enum class Interpolation {
+ Invalid = 0,
+ PerspectiveSample = 1,
+ PerspectiveCenter = 2,
+ PerspectiveCentroid = 3,
+ LinearSample = 4,
+ LinearCenter = 5,
+ LinearCentroid = 6,
+};
+
constexpr size_t NumAttributes = static_cast(Attribute::Max);
constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32;
@@ -104,6 +114,15 @@ constexpr bool IsMrt(Attribute attribute) noexcept {
return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7;
}
+constexpr bool IsLinear(Interpolation interp) noexcept {
+ return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid;
+}
+
+constexpr bool IsPerspective(Interpolation interp) noexcept {
+ return interp >= Interpolation::PerspectiveSample &&
+ interp <= Interpolation::PerspectiveCentroid;
+}
+
[[nodiscard]] std::string NameOf(Attribute attribute);
[[nodiscard]] constexpr Attribute operator+(Attribute attr, int num) {
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
index 07249edfe..e6cc32829 100644
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include
-#include
#include
#include
#include "common/assert.h"
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
index 7b9b81093..0e41f4b2d 100644
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@@ -6,7 +6,6 @@
#include
#include
-#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/condition.h"
@@ -17,6 +16,7 @@ namespace Shader::IR {
class IREmitter {
public:
+ explicit IREmitter() = default;
explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
explicit IREmitter(Block& block_, Block::iterator insertion_point_)
: block{&block_}, insertion_point{insertion_point_} {}
diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h
index 82aa436a7..c534eecd8 100644
--- a/src/shader_recompiler/ir/reg.h
+++ b/src/shader_recompiler/ir/reg.h
@@ -7,7 +7,7 @@
#include "common/bit_field.h"
#include "common/enum.h"
#include "common/types.h"
-#include "video_core/amdgpu/types.h"
+#include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR {
diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h
index 38c81ba5f..faba8e285 100644
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@@ -5,34 +5,310 @@
#include
#include
+#include "common/assert.h"
#include "common/types.h"
-#include "video_core/amdgpu/types.h"
namespace AmdGpu {
-enum NumberClass {
+// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
+enum class DataFormat : u32 {
+ FormatInvalid = 0,
+ Format8 = 1,
+ Format16 = 2,
+ Format8_8 = 3,
+ Format32 = 4,
+ Format16_16 = 5,
+ Format10_11_11 = 6,
+ Format11_11_10 = 7,
+ Format10_10_10_2 = 8,
+ Format2_10_10_10 = 9,
+ Format8_8_8_8 = 10,
+ Format32_32 = 11,
+ Format16_16_16_16 = 12,
+ Format32_32_32 = 13,
+ Format32_32_32_32 = 14,
+ Format5_6_5 = 16,
+ Format1_5_5_5 = 17,
+ Format5_5_5_1 = 18,
+ Format4_4_4_4 = 19,
+ Format8_24 = 20,
+ Format24_8 = 21,
+ FormatX24_8_32 = 22,
+ FormatGB_GR = 32,
+ FormatBG_RG = 33,
+ Format5_9_9_9 = 34,
+ FormatBc1 = 35,
+ FormatBc2 = 36,
+ FormatBc3 = 37,
+ FormatBc4 = 38,
+ FormatBc5 = 39,
+ FormatBc6 = 40,
+ FormatBc7 = 41,
+ FormatFmask8_1 = 47,
+ FormatFmask8_2 = 48,
+ FormatFmask8_4 = 49,
+ FormatFmask16_1 = 50,
+ FormatFmask16_2 = 51,
+ FormatFmask32_2 = 52,
+ FormatFmask32_4 = 53,
+ FormatFmask32_8 = 54,
+ FormatFmask64_4 = 55,
+ FormatFmask64_8 = 56,
+ Format4_4 = 57,
+ Format6_5_5 = 58,
+ Format1 = 59,
+ Format1_Reversed = 60,
+ Format32_As_8 = 61,
+ Format32_As_8_8 = 62,
+ Format32_As_32_32_32_32 = 63,
+};
+
+enum class NumberFormat : u32 {
+ Unorm = 0,
+ Snorm = 1,
+ Uscaled = 2,
+ Sscaled = 3,
+ Uint = 4,
+ Sint = 5,
+ SnormNz = 6,
+ Float = 7,
+ Srgb = 9,
+ Ubnorm = 10,
+ UbnormNz = 11,
+ Ubint = 12,
+ Ubscaled = 13,
+};
+
+enum class NumberClass {
Float,
Sint,
Uint,
};
-[[nodiscard]] constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
- switch (nfmt) {
- case NumberFormat::Sint:
- return Sint;
- case NumberFormat::Uint:
- return Uint;
+enum class CompSwizzle : u8 {
+ Zero = 0,
+ One = 1,
+ Red = 4,
+ Green = 5,
+ Blue = 6,
+ Alpha = 7,
+};
+
+enum class NumberConversion : u32 {
+ None = 0,
+ UintToUscaled = 1,
+ SintToSscaled = 2,
+ UnormToUbnorm = 3,
+ Sint8ToSnormNz = 4,
+ Sint16ToSnormNz = 5,
+ Uint32ToUnorm = 6,
+};
+
+struct CompMapping {
+ CompSwizzle r;
+ CompSwizzle g;
+ CompSwizzle b;
+ CompSwizzle a;
+
+ auto operator<=>(const CompMapping& other) const = default;
+
+ template
+ [[nodiscard]] std::array Apply(const std::array& data) const {
+ return {
+ ApplySingle(data, r),
+ ApplySingle(data, g),
+ ApplySingle(data, b),
+ ApplySingle(data, a),
+ };
+ }
+
+ [[nodiscard]] CompMapping Inverse() const {
+ CompMapping result{};
+ InverseSingle(result.r, CompSwizzle::Red);
+ InverseSingle(result.g, CompSwizzle::Green);
+ InverseSingle(result.b, CompSwizzle::Blue);
+ InverseSingle(result.a, CompSwizzle::Alpha);
+ return result;
+ }
+
+private:
+ template
+ T ApplySingle(const std::array& data, const CompSwizzle swizzle) const {
+ switch (swizzle) {
+ case CompSwizzle::Zero:
+ return T(0);
+ case CompSwizzle::One:
+ return T(1);
+ case CompSwizzle::Red:
+ return data[0];
+ case CompSwizzle::Green:
+ return data[1];
+ case CompSwizzle::Blue:
+ return data[2];
+ case CompSwizzle::Alpha:
+ return data[3];
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
+ if (r == target) {
+ dst = CompSwizzle::Red;
+ } else if (g == target) {
+ dst = CompSwizzle::Green;
+ } else if (b == target) {
+ dst = CompSwizzle::Blue;
+ } else if (a == target) {
+ dst = CompSwizzle::Alpha;
+ } else {
+ dst = CompSwizzle::Zero;
+ }
+ }
+};
+
+static constexpr CompMapping IdentityMapping = {
+ .r = CompSwizzle::Red,
+ .g = CompSwizzle::Green,
+ .b = CompSwizzle::Blue,
+ .a = CompSwizzle::Alpha,
+};
+
+constexpr DataFormat RemapDataFormat(const DataFormat format) {
+ switch (format) {
+ case DataFormat::Format11_11_10:
+ return DataFormat::Format10_11_11;
+ case DataFormat::Format10_10_10_2:
+ return DataFormat::Format2_10_10_10;
+ case DataFormat::Format5_5_5_1:
+ return DataFormat::Format1_5_5_5;
default:
- return Float;
+ return format;
}
}
-[[nodiscard]] constexpr bool IsInteger(const NumberFormat nfmt) {
+constexpr NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
+ switch (format) {
+ case NumberFormat::Unorm: {
+ switch (data_format) {
+ case DataFormat::Format32:
+ case DataFormat::Format32_32:
+ case DataFormat::Format32_32_32:
+ case DataFormat::Format32_32_32_32:
+ return NumberFormat::Uint;
+ default:
+ return format;
+ }
+ }
+ case NumberFormat::Uscaled:
+ return NumberFormat::Uint;
+ case NumberFormat::Sscaled:
+ case NumberFormat::SnormNz:
+ return NumberFormat::Sint;
+ case NumberFormat::Ubnorm:
+ return NumberFormat::Unorm;
+ case NumberFormat::Float:
+ if (data_format == DataFormat::Format8) {
+ // Games may ask for 8-bit float when they want to access the stencil component
+ // of a depth-stencil image. Change to unsigned int to match the stencil format.
+ // This is also the closest approximation to pass the bits through unconverted.
+ return NumberFormat::Uint;
+ }
+ [[fallthrough]];
+ default:
+ return format;
+ }
+}
+
+constexpr CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
+ switch (format) {
+ case DataFormat::Format1_5_5_5:
+ case DataFormat::Format11_11_10: {
+ CompMapping result;
+ result.r = swizzle.b;
+ result.g = swizzle.g;
+ result.b = swizzle.r;
+ result.a = swizzle.a;
+ return result;
+ }
+ case DataFormat::Format10_10_10_2: {
+ CompMapping result;
+ result.r = swizzle.a;
+ result.g = swizzle.b;
+ result.b = swizzle.g;
+ result.a = swizzle.r;
+ return result;
+ }
+ case DataFormat::Format4_4_4_4: {
+ // Remap to a more supported component order.
+ CompMapping result;
+ result.r = swizzle.g;
+ result.g = swizzle.b;
+ result.b = swizzle.a;
+ result.a = swizzle.r;
+ return result;
+ }
+ default:
+ return swizzle;
+ }
+}
+
+constexpr NumberConversion MapNumberConversion(const NumberFormat num_fmt,
+ const DataFormat data_fmt) {
+ switch (num_fmt) {
+ case NumberFormat::Unorm: {
+ switch (data_fmt) {
+ case DataFormat::Format32:
+ case DataFormat::Format32_32:
+ case DataFormat::Format32_32_32:
+ case DataFormat::Format32_32_32_32:
+ return NumberConversion::Uint32ToUnorm;
+ default:
+ return NumberConversion::None;
+ }
+ }
+ case NumberFormat::Uscaled:
+ return NumberConversion::UintToUscaled;
+ case NumberFormat::Sscaled:
+ return NumberConversion::SintToSscaled;
+ case NumberFormat::Ubnorm:
+ return NumberConversion::UnormToUbnorm;
+ case NumberFormat::SnormNz: {
+ switch (data_fmt) {
+ case DataFormat::Format8:
+ case DataFormat::Format8_8:
+ case DataFormat::Format8_8_8_8:
+ return NumberConversion::Sint8ToSnormNz;
+ case DataFormat::Format16:
+ case DataFormat::Format16_16:
+ case DataFormat::Format16_16_16_16:
+ return NumberConversion::Sint16ToSnormNz;
+ default:
+ UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
+ }
+ }
+ default:
+ return NumberConversion::None;
+ }
+}
+
+constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
+ switch (nfmt) {
+ case NumberFormat::Sint:
+ return NumberClass::Sint;
+ case NumberFormat::Uint:
+ return NumberClass::Uint;
+ default:
+ return NumberClass::Float;
+ }
+}
+
+constexpr bool IsInteger(const NumberFormat nfmt) {
return nfmt == AmdGpu::NumberFormat::Sint || nfmt == AmdGpu::NumberFormat::Uint;
}
-[[nodiscard]] std::string_view NameOf(DataFormat fmt);
-[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
+std::string_view NameOf(DataFormat fmt);
+std::string_view NameOf(NumberFormat fmt);
int NumComponents(DataFormat format);
int NumBits(DataFormat format);
diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h
index 89ac04f9a..5ede90200 100644
--- a/src/video_core/amdgpu/resource.h
+++ b/src/video_core/amdgpu/resource.h
@@ -6,7 +6,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_field.h"
-#include "common/types.h"
#include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu {
diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h
index f7536f7e2..009fbbbb2 100644
--- a/src/video_core/amdgpu/types.h
+++ b/src/video_core/amdgpu/types.h
@@ -5,7 +5,6 @@
#include
#include
-#include "common/assert.h"
#include "common/types.h"
namespace AmdGpu {
@@ -114,281 +113,6 @@ enum class GsOutputPrimitiveType : u32 {
TriangleStrip = 2,
};
-// Table 8.13 Data and Image Formats [Sea Islands Series Instruction Set Architecture]
-enum class DataFormat : u32 {
- FormatInvalid = 0,
- Format8 = 1,
- Format16 = 2,
- Format8_8 = 3,
- Format32 = 4,
- Format16_16 = 5,
- Format10_11_11 = 6,
- Format11_11_10 = 7,
- Format10_10_10_2 = 8,
- Format2_10_10_10 = 9,
- Format8_8_8_8 = 10,
- Format32_32 = 11,
- Format16_16_16_16 = 12,
- Format32_32_32 = 13,
- Format32_32_32_32 = 14,
- Format5_6_5 = 16,
- Format1_5_5_5 = 17,
- Format5_5_5_1 = 18,
- Format4_4_4_4 = 19,
- Format8_24 = 20,
- Format24_8 = 21,
- FormatX24_8_32 = 22,
- FormatGB_GR = 32,
- FormatBG_RG = 33,
- Format5_9_9_9 = 34,
- FormatBc1 = 35,
- FormatBc2 = 36,
- FormatBc3 = 37,
- FormatBc4 = 38,
- FormatBc5 = 39,
- FormatBc6 = 40,
- FormatBc7 = 41,
- FormatFmask8_1 = 47,
- FormatFmask8_2 = 48,
- FormatFmask8_4 = 49,
- FormatFmask16_1 = 50,
- FormatFmask16_2 = 51,
- FormatFmask32_2 = 52,
- FormatFmask32_4 = 53,
- FormatFmask32_8 = 54,
- FormatFmask64_4 = 55,
- FormatFmask64_8 = 56,
- Format4_4 = 57,
- Format6_5_5 = 58,
- Format1 = 59,
- Format1_Reversed = 60,
- Format32_As_8 = 61,
- Format32_As_8_8 = 62,
- Format32_As_32_32_32_32 = 63,
-};
-
-enum class NumberFormat : u32 {
- Unorm = 0,
- Snorm = 1,
- Uscaled = 2,
- Sscaled = 3,
- Uint = 4,
- Sint = 5,
- SnormNz = 6,
- Float = 7,
- Srgb = 9,
- Ubnorm = 10,
- UbnormNz = 11,
- Ubint = 12,
- Ubscaled = 13,
-};
-
-enum class CompSwizzle : u8 {
- Zero = 0,
- One = 1,
- Red = 4,
- Green = 5,
- Blue = 6,
- Alpha = 7,
-};
-
-enum class NumberConversion : u32 {
- None = 0,
- UintToUscaled = 1,
- SintToSscaled = 2,
- UnormToUbnorm = 3,
- Sint8ToSnormNz = 4,
- Sint16ToSnormNz = 5,
- Uint32ToUnorm = 6,
-};
-
-struct CompMapping {
- CompSwizzle r;
- CompSwizzle g;
- CompSwizzle b;
- CompSwizzle a;
-
- auto operator<=>(const CompMapping& other) const = default;
-
- template
- [[nodiscard]] std::array Apply(const std::array& data) const {
- return {
- ApplySingle(data, r),
- ApplySingle(data, g),
- ApplySingle(data, b),
- ApplySingle(data, a),
- };
- }
-
- [[nodiscard]] CompMapping Inverse() const {
- CompMapping result{};
- InverseSingle(result.r, CompSwizzle::Red);
- InverseSingle(result.g, CompSwizzle::Green);
- InverseSingle(result.b, CompSwizzle::Blue);
- InverseSingle(result.a, CompSwizzle::Alpha);
- return result;
- }
-
-private:
- template
- T ApplySingle(const std::array& data, const CompSwizzle swizzle) const {
- switch (swizzle) {
- case CompSwizzle::Zero:
- return T(0);
- case CompSwizzle::One:
- return T(1);
- case CompSwizzle::Red:
- return data[0];
- case CompSwizzle::Green:
- return data[1];
- case CompSwizzle::Blue:
- return data[2];
- case CompSwizzle::Alpha:
- return data[3];
- default:
- UNREACHABLE();
- }
- }
-
- void InverseSingle(CompSwizzle& dst, const CompSwizzle target) const {
- if (r == target) {
- dst = CompSwizzle::Red;
- } else if (g == target) {
- dst = CompSwizzle::Green;
- } else if (b == target) {
- dst = CompSwizzle::Blue;
- } else if (a == target) {
- dst = CompSwizzle::Alpha;
- } else {
- dst = CompSwizzle::Zero;
- }
- }
-};
-
-static constexpr CompMapping IdentityMapping = {
- .r = CompSwizzle::Red,
- .g = CompSwizzle::Green,
- .b = CompSwizzle::Blue,
- .a = CompSwizzle::Alpha,
-};
-
-inline DataFormat RemapDataFormat(const DataFormat format) {
- switch (format) {
- case DataFormat::Format11_11_10:
- return DataFormat::Format10_11_11;
- case DataFormat::Format10_10_10_2:
- return DataFormat::Format2_10_10_10;
- case DataFormat::Format5_5_5_1:
- return DataFormat::Format1_5_5_5;
- default:
- return format;
- }
-}
-
-inline NumberFormat RemapNumberFormat(const NumberFormat format, const DataFormat data_format) {
- switch (format) {
- case NumberFormat::Unorm: {
- switch (data_format) {
- case DataFormat::Format32:
- case DataFormat::Format32_32:
- case DataFormat::Format32_32_32:
- case DataFormat::Format32_32_32_32:
- return NumberFormat::Uint;
- default:
- return format;
- }
- }
- case NumberFormat::Uscaled:
- return NumberFormat::Uint;
- case NumberFormat::Sscaled:
- case NumberFormat::SnormNz:
- return NumberFormat::Sint;
- case NumberFormat::Ubnorm:
- return NumberFormat::Unorm;
- case NumberFormat::Float:
- if (data_format == DataFormat::Format8) {
- // Games may ask for 8-bit float when they want to access the stencil component
- // of a depth-stencil image. Change to unsigned int to match the stencil format.
- // This is also the closest approximation to pass the bits through unconverted.
- return NumberFormat::Uint;
- }
- [[fallthrough]];
- default:
- return format;
- }
-}
-
-inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizzle) {
- switch (format) {
- case DataFormat::Format1_5_5_5:
- case DataFormat::Format11_11_10: {
- CompMapping result;
- result.r = swizzle.b;
- result.g = swizzle.g;
- result.b = swizzle.r;
- result.a = swizzle.a;
- return result;
- }
- case DataFormat::Format10_10_10_2: {
- CompMapping result;
- result.r = swizzle.a;
- result.g = swizzle.b;
- result.b = swizzle.g;
- result.a = swizzle.r;
- return result;
- }
- case DataFormat::Format4_4_4_4: {
- // Remap to a more supported component order.
- CompMapping result;
- result.r = swizzle.g;
- result.g = swizzle.b;
- result.b = swizzle.a;
- result.a = swizzle.r;
- return result;
- }
- default:
- return swizzle;
- }
-}
-
-inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) {
- switch (num_fmt) {
- case NumberFormat::Unorm: {
- switch (data_fmt) {
- case DataFormat::Format32:
- case DataFormat::Format32_32:
- case DataFormat::Format32_32_32:
- case DataFormat::Format32_32_32_32:
- return NumberConversion::Uint32ToUnorm;
- default:
- return NumberConversion::None;
- }
- }
- case NumberFormat::Uscaled:
- return NumberConversion::UintToUscaled;
- case NumberFormat::Sscaled:
- return NumberConversion::SintToSscaled;
- case NumberFormat::Ubnorm:
- return NumberConversion::UnormToUbnorm;
- case NumberFormat::SnormNz: {
- switch (data_fmt) {
- case DataFormat::Format8:
- case DataFormat::Format8_8:
- case DataFormat::Format8_8_8_8:
- return NumberConversion::Sint8ToSnormNz;
- case DataFormat::Format16:
- case DataFormat::Format16_16:
- case DataFormat::Format16_16_16_16:
- return NumberConversion::Sint16ToSnormNz;
- default:
- UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt));
- }
- }
- default:
- return NumberConversion::None;
- }
-}
-
} // namespace AmdGpu
template <>
From ce42eccc9d9b629b2e25296d42f7d686d53cfb25 Mon Sep 17 00:00:00 2001
From: TheTurtle
Date: Sun, 8 Jun 2025 23:09:08 +0300
Subject: [PATCH 20/28] texture_cache: Handle compressed views of uncompressed
images (#3056)
* pixel_format: Remove unused tables, refactor
* host_compatibilty: Cleanup and support uncompressed views of compressed formats
* texture_cache: Handle compressed views of uncompressed images
* tile_manager: Bump max supported mips to 16
Fixes a crash during start
* oops
* texture_cache: Fix order of format compat check
---
CMakeLists.txt | 1 +
.../ir/passes/lower_buffer_format_to_raw.cpp | 2 +-
src/video_core/amdgpu/liverpool.h | 2 +-
src/video_core/amdgpu/pixel_format.cpp | 214 +++++-----
src/video_core/amdgpu/pixel_format.h | 6 +-
.../host_shaders/detilers/micro_128bpp.comp | 2 +-
.../host_shaders/detilers/micro_16bpp.comp | 2 +-
.../host_shaders/detilers/micro_32bpp.comp | 2 +-
.../host_shaders/detilers/micro_64bpp.comp | 2 +-
.../host_shaders/detilers/micro_8bpp.comp | 2 +-
.../texture_cache/host_compatibility.cpp | 220 ++++++++++
.../texture_cache/host_compatibility.h | 380 +-----------------
src/video_core/texture_cache/image.cpp | 65 +--
src/video_core/texture_cache/image_info.cpp | 79 +++-
src/video_core/texture_cache/image_info.h | 25 +-
.../texture_cache/texture_cache.cpp | 25 +-
src/video_core/texture_cache/tile_manager.cpp | 14 +-
17 files changed, 434 insertions(+), 609 deletions(-)
create mode 100644 src/video_core/texture_cache/host_compatibility.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 20d33ac95..6dfe9348a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -964,6 +964,7 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/texture_cache/tile_manager.cpp
src/video_core/texture_cache/tile_manager.h
src/video_core/texture_cache/types.h
+ src/video_core/texture_cache/host_compatibility.cpp
src/video_core/texture_cache/host_compatibility.h
src/video_core/page_manager.cpp
src/video_core/page_manager.h
diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
index 65be02541..fcb86e3fb 100644
--- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
+++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
@@ -15,7 +15,7 @@ struct FormatInfo {
AmdGpu::NumberFormat num_format;
AmdGpu::CompMapping swizzle;
AmdGpu::NumberConversion num_conversion;
- int num_components;
+ u32 num_components;
};
static bool IsBufferFormatLoad(const IR::Inst& inst) {
diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h
index 245e34d35..2f33c7302 100644
--- a/src/video_core/amdgpu/liverpool.h
+++ b/src/video_core/amdgpu/liverpool.h
@@ -914,7 +914,7 @@ struct Liverpool {
}
size_t GetColorSliceSize() const {
- const auto num_bytes_per_element = NumBits(info.format) / 8u;
+ const auto num_bytes_per_element = NumBitsPerBlock(info.format) / 8u;
const auto slice_size =
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
return slice_size;
diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp
index 881c33e44..682cdf357 100644
--- a/src/video_core/amdgpu/pixel_format.cpp
+++ b/src/video_core/amdgpu/pixel_format.cpp
@@ -111,136 +111,106 @@ std::string_view NameOf(NumberFormat fmt) {
}
}
-int NumComponents(DataFormat format) {
- constexpr std::array num_components_per_element = {
- 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,
- 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4,
- -1, -1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1, 1};
-
- const u32 index = static_cast(format);
- if (index >= num_components_per_element.size()) {
- return 0;
- }
- return num_components_per_element[index];
-}
-
-int NumBits(DataFormat format) {
- const std::array num_bits_per_element = {
- 0, 8, 16, 16, 32, 32, 32, 32, 32, 32, 32, 64, 64, 96, 128, -1, 16, 16, 16, 16, 32,
- 32, 64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 16, 16, 32, 4, 8, 8, 4, 8, 8, 8,
- -1, -1, 8, 8, 8, 8, 8, 8, 16, 16, 32, 32, 32, 64, 64, 8, 16, 1, 1};
-
- const u32 index = static_cast(format);
- if (index >= num_bits_per_element.size()) {
- return 0;
- }
- return num_bits_per_element[index];
-}
-
-static constexpr std::array component_bits = {
- std::array{0, 0, 0, 0}, // 0 FormatInvalid
- std::array{8, 0, 0, 0}, // 1 Format8
- std::array{16, 0, 0, 0}, // 2 Format16
- std::array{8, 8, 0, 0}, // 3 Format8_8
- std::array{32, 0, 0, 0}, // 4 Format32
- std::array{16, 16, 0, 0}, // 5 Format16_16
- std::array{11, 11, 10, 0}, // 6 Format10_11_11
- std::array{10, 11, 11, 0}, // 7 Format11_11_10
- std::array{2, 10, 10, 10}, // 8 Format10_10_10_2
- std::array{10, 10, 10, 2}, // 9 Format2_10_10_10
- std::array{8, 8, 8, 8}, // 10 Format8_8_8_8
- std::array{32, 32, 0, 0}, // 11 Format32_32
- std::array{16, 16, 16, 16}, // 12 Format16_16_16_16
- std::array{32, 32, 32, 0}, // 13 Format32_32_32
- std::array{32, 32, 32, 32}, // 14 Format32_32_32_32
- std::array{0, 0, 0, 0}, // 15
- std::array{5, 6, 5, 0}, // 16 Format5_6_5
- std::array{5, 5, 5, 1}, // 17 Format1_5_5_5
- std::array{1, 5, 5, 5}, // 18 Format5_5_5_1
- std::array{4, 4, 4, 4}, // 19 Format4_4_4_4
- std::array{24, 8, 0, 0}, // 20 Format8_24
- std::array{8, 24, 0, 0}, // 21 Format24_8
- std::array{8, 24, 0, 0}, // 22 FormatX24_8_32
- std::array{0, 0, 0, 0}, // 23
- std::array{0, 0, 0, 0}, // 24
- std::array{0, 0, 0, 0}, // 25
- std::array{0, 0, 0, 0}, // 26
- std::array{0, 0, 0, 0}, // 27
- std::array{0, 0, 0, 0}, // 28
- std::array{0, 0, 0, 0}, // 29
- std::array{0, 0, 0, 0}, // 30
- std::array{0, 0, 0, 0}, // 31
- std::array{0, 0, 0, 0}, // 32 FormatGB_GR
- std::array{0, 0, 0, 0}, // 33 FormatBG_RG
- std::array{0, 0, 0, 0}, // 34 Format5_9_9_9
- std::array{0, 0, 0, 0}, // 35 FormatBc1
- std::array{0, 0, 0, 0}, // 36 FormatBc2
- std::array{0, 0, 0, 0}, // 37 FormatBc3
- std::array{0, 0, 0, 0}, // 38 FormatBc4
- std::array{0, 0, 0, 0}, // 39 FormatBc5
- std::array{0, 0, 0, 0}, // 40 FormatBc6
- std::array{0, 0, 0, 0}, // 41 FormatBc7
+static constexpr std::array NUM_COMPONENTS = {
+ 0, // 0 FormatInvalid
+ 1, // 1 Format8
+ 1, // 2 Format16
+ 2, // 3 Format8_8
+ 1, // 4 Format32
+ 2, // 5 Format16_16
+ 3, // 6 Format10_11_11
+ 3, // 7 Format11_11_10
+ 4, // 8 Format10_10_10_2
+ 4, // 9 Format2_10_10_10
+ 4, // 10 Format8_8_8_8
+ 2, // 11 Format32_32
+ 4, // 12 Format16_16_16_16
+ 3, // 13 Format32_32_32
+ 4, // 14 Format32_32_32_32
+ 0, // 15
+ 3, // 16 Format5_6_5
+ 4, // 17 Format1_5_5_5
+ 4, // 18 Format5_5_5_1
+ 4, // 19 Format4_4_4_4
+ 2, // 20 Format8_24
+ 2, // 21 Format24_8
+ 2, // 22 FormatX24_8_32
+ 0, // 23
+ 0, // 24
+ 0, // 25
+ 0, // 26
+ 0, // 27
+ 0, // 28
+ 0, // 29
+ 0, // 30
+ 0, // 31
+ 3, // 32 FormatGB_GR
+ 3, // 33 FormatBG_RG
+ 4, // 34 Format5_9_9_9
+ 4, // 35 FormatBc1
+ 4, // 36 FormatBc2
+ 4, // 37 FormatBc3
+ 1, // 38 FormatBc4
+ 2, // 39 FormatBc5
+ 3, // 40 FormatBc6
+ 4, // 41 FormatBc7
};
-u32 ComponentBits(DataFormat format, u32 comp) {
+u32 NumComponents(DataFormat format) {
const u32 index = static_cast(format);
- if (index >= component_bits.size() || comp >= 4) {
- return 0;
- }
- return component_bits[index][comp];
+ ASSERT_MSG(index < NUM_COMPONENTS.size(), "Invalid data format = {}", format);
+ return NUM_COMPONENTS[index];
}
-static constexpr std::array component_offset = {
- std::array{-1, -1, -1, -1}, // 0 FormatInvalid
- std::array{0, -1, -1, -1}, // 1 Format8
- std::array{0, -1, -1, -1}, // 2 Format16
- std::array{0, 8, -1, -1}, // 3 Format8_8
- std::array{0, -1, -1, -1}, // 4 Format32
- std::array{0, 16, -1, -1}, // 5 Format16_16
- std::array{0, 11, 22, -1}, // 6 Format10_11_11
- std::array{0, 10, 21, -1}, // 7 Format11_11_10
- std::array{0, 2, 12, 22}, // 8 Format10_10_10_2
- std::array{0, 10, 20, 30}, // 9 Format2_10_10_10
- std::array{0, 8, 16, 24}, // 10 Format8_8_8_8
- std::array{0, 32, -1, -1}, // 11 Format32_32
- std::array{0, 16, 32, 48}, // 12 Format16_16_16_16
- std::array{0, 32, 64, -1}, // 13 Format32_32_32
- std::array{0, 32, 64, 96}, // 14 Format32_32_32_32
- std::array{-1, -1, -1, -1}, // 15
- std::array{0, 5, 11, -1}, // 16 Format5_6_5
- std::array{0, 5, 10, 15}, // 17 Format1_5_5_5
- std::array{0, 1, 6, 11}, // 18 Format5_5_5_1
- std::array{0, 4, 8, 12}, // 19 Format4_4_4_4
- std::array{0, 24, -1, -1}, // 20 Format8_24
- std::array{0, 8, -1, -1}, // 21 Format24_8
- std::array{0, 8, -1, -1}, // 22 FormatX24_8_32
- std::array{-1, -1, -1, -1}, // 23
- std::array{-1, -1, -1, -1}, // 24
- std::array{-1, -1, -1, -1}, // 25
- std::array{-1, -1, -1, -1}, // 26
- std::array{-1, -1, -1, -1}, // 27
- std::array{-1, -1, -1, -1}, // 28
- std::array{-1, -1, -1, -1}, // 29
- std::array{-1, -1, -1, -1}, // 30
- std::array{-1, -1, -1, -1}, // 31
- std::array{-1, -1, -1, -1}, // 32 FormatGB_GR
- std::array{-1, -1, -1, -1}, // 33 FormatBG_RG
- std::array{-1, -1, -1, -1}, // 34 Format5_9_9_9
- std::array{-1, -1, -1, -1}, // 35 FormatBc1
- std::array{-1, -1, -1, -1}, // 36 FormatBc2
- std::array{-1, -1, -1, -1}, // 37 FormatBc3
- std::array{-1, -1, -1, -1}, // 38 FormatBc4
- std::array{-1, -1, -1, -1}, // 39 FormatBc5
- std::array{-1, -1, -1, -1}, // 40 FormatBc6
- std::array{-1, -1, -1, -1}, // 41 FormatBc7
+static constexpr std::array BITS_PER_BLOCK = {
+ 0, // 0 FormatInvalid
+ 8, // 1 Format8
+ 16, // 2 Format16
+ 16, // 3 Format8_8
+ 32, // 4 Format32
+ 32, // 5 Format16_16
+ 32, // 6 Format10_11_11
+ 32, // 7 Format11_11_10
+ 32, // 8 Format10_10_10_2
+ 32, // 9 Format2_10_10_10
+ 32, // 10 Format8_8_8_8
+ 64, // 11 Format32_32
+ 64, // 12 Format16_16_16_16
+ 96, // 13 Format32_32_32
+ 128, // 14 Format32_32_32_32
+ 0, // 15
+ 16, // 16 Format5_6_5
+ 16, // 17 Format1_5_5_5
+ 16, // 18 Format5_5_5_1
+ 16, // 19 Format4_4_4_4
+ 32, // 20 Format8_24
+ 32, // 21 Format24_8
+ 64, // 22 FormatX24_8_32
+ 0, // 23
+ 0, // 24
+ 0, // 25
+ 0, // 26
+ 0, // 27
+ 0, // 28
+ 0, // 29
+ 0, // 30
+ 0, // 31
+ 16, // 32 FormatGB_GR
+ 16, // 33 FormatBG_RG
+ 32, // 34 Format5_9_9_9
+ 64, // 35 FormatBc1
+ 128, // 36 FormatBc2
+ 128, // 37 FormatBc3
+ 64, // 38 FormatBc4
+ 128, // 39 FormatBc5
+ 128, // 40 FormatBc6
+ 128, // 41 FormatBc7
};
-s32 ComponentOffset(DataFormat format, u32 comp) {
+u32 NumBitsPerBlock(DataFormat format) {
const u32 index = static_cast(format);
- if (index >= component_offset.size() || comp >= 4) {
- return -1;
- }
- return component_offset[index][comp];
+ ASSERT_MSG(index < BITS_PER_BLOCK.size(), "Invalid data format = {}", format);
+ return BITS_PER_BLOCK[index];
}
} // namespace AmdGpu
diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h
index faba8e285..bd0f778f4 100644
--- a/src/video_core/amdgpu/pixel_format.h
+++ b/src/video_core/amdgpu/pixel_format.h
@@ -310,10 +310,8 @@ constexpr bool IsInteger(const NumberFormat nfmt) {
std::string_view NameOf(DataFormat fmt);
std::string_view NameOf(NumberFormat fmt);
-int NumComponents(DataFormat format);
-int NumBits(DataFormat format);
-u32 ComponentBits(DataFormat format, u32 comp);
-s32 ComponentOffset(DataFormat format, u32 comp);
+u32 NumComponents(DataFormat format);
+u32 NumBitsPerBlock(DataFormat format);
} // namespace AmdGpu
diff --git a/src/video_core/host_shaders/detilers/micro_128bpp.comp b/src/video_core/host_shaders/detilers/micro_128bpp.comp
index a09a0b4c4..a43073a8b 100644
--- a/src/video_core/host_shaders/detilers/micro_128bpp.comp
+++ b/src/video_core/host_shaders/detilers/micro_128bpp.comp
@@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
uint num_levels;
uint pitch;
uint height;
- uint sizes[14];
+ uint sizes[16];
} info;
// Inverse morton LUT, small enough to fit into K$
diff --git a/src/video_core/host_shaders/detilers/micro_16bpp.comp b/src/video_core/host_shaders/detilers/micro_16bpp.comp
index 909a14acc..5f1240d64 100644
--- a/src/video_core/host_shaders/detilers/micro_16bpp.comp
+++ b/src/video_core/host_shaders/detilers/micro_16bpp.comp
@@ -18,7 +18,7 @@ layout(push_constant) uniform image_info {
uint num_levels;
uint pitch;
uint height;
- uint sizes[14];
+ uint sizes[16];
} info;
#define MICRO_TILE_DIM 8
diff --git a/src/video_core/host_shaders/detilers/micro_32bpp.comp b/src/video_core/host_shaders/detilers/micro_32bpp.comp
index cdc8d0018..605523e4d 100644
--- a/src/video_core/host_shaders/detilers/micro_32bpp.comp
+++ b/src/video_core/host_shaders/detilers/micro_32bpp.comp
@@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
uint num_levels;
uint pitch;
uint height;
- uint sizes[14];
+ uint sizes[16];
} info;
// Inverse morton LUT, small enough to fit into K$
diff --git a/src/video_core/host_shaders/detilers/micro_64bpp.comp b/src/video_core/host_shaders/detilers/micro_64bpp.comp
index c128ba5a1..1bca44067 100644
--- a/src/video_core/host_shaders/detilers/micro_64bpp.comp
+++ b/src/video_core/host_shaders/detilers/micro_64bpp.comp
@@ -16,7 +16,7 @@ layout(push_constant) uniform image_info {
uint num_levels;
uint pitch;
uint height;
- uint sizes[14];
+ uint sizes[16];
} info;
// Inverse morton LUT, small enough to fit into K$
diff --git a/src/video_core/host_shaders/detilers/micro_8bpp.comp b/src/video_core/host_shaders/detilers/micro_8bpp.comp
index ecf706450..1d9b48daa 100644
--- a/src/video_core/host_shaders/detilers/micro_8bpp.comp
+++ b/src/video_core/host_shaders/detilers/micro_8bpp.comp
@@ -19,7 +19,7 @@ layout(push_constant) uniform image_info {
uint num_levels;
uint pitch;
uint height;
- uint sizes[14];
+ uint sizes[16];
} info;
#define MICRO_TILE_DIM 8
diff --git a/src/video_core/texture_cache/host_compatibility.cpp b/src/video_core/texture_cache/host_compatibility.cpp
new file mode 100644
index 000000000..327709e64
--- /dev/null
+++ b/src/video_core/texture_cache/host_compatibility.cpp
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/)
+// Copyright © 2015-2023 The Khronos Group Inc.
+// Copyright © 2015-2023 Valve Corporation
+// Copyright © 2015-2023 LunarG, Inc.
+
+#include
+#include "common/enum.h"
+#include "video_core/texture_cache/host_compatibility.h"
+
+namespace VideoCore {
+
+/**
+ * @brief All classes of format compatibility according to the Vulkan specification
+ * @url
+ * https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.h#L47-L131
+ */
+enum class CompatibilityClass {
+ NONE = 0,
+ _128BIT = 1 << 0,
+ _16BIT = 1 << 1,
+ _192BIT = 1 << 2,
+ _24BIT = 1 << 3,
+ _256BIT = 1 << 4,
+ _32BIT = 1 << 5,
+ _48BIT = 1 << 6,
+ _64BIT = 1 << 7,
+ _8BIT = 1 << 8,
+ _96BIT = 1 << 9,
+ BC1_RGB = 1 << 10,
+ BC1_RGBA = 1 << 11,
+ BC2 = 1 << 12,
+ BC3 = 1 << 13,
+ BC4 = 1 << 14,
+ BC5 = 1 << 15,
+ BC6H = 1 << 16,
+ BC7 = 1 << 17,
+ D16 = 1 << 18,
+ D16S8 = 1 << 19,
+ D24 = 1 << 20,
+ D24S8 = 1 << 21,
+ D32 = 1 << 22,
+ D32S8 = 1 << 23,
+ S8 = 1 << 24,
+};
+DECLARE_ENUM_FLAG_OPERATORS(CompatibilityClass)
+
+/**
+ * @brief The format compatibility class according to the Vulkan specification
+ * @url
+ * https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
+ * @url
+ * https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f/layers/generated/vk_format_utils.cpp#L70-L812
+ */
+static const std::unordered_map FORMAT_TABLE = {
+ {vk::Format::eA1R5G5B5UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eA2B10G10R10SintPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2B10G10R10SnormPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2B10G10R10SscaledPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2B10G10R10UintPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2B10G10R10UnormPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2B10G10R10UscaledPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2R10G10B10SintPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2R10G10B10SnormPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2R10G10B10SscaledPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2R10G10B10UintPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2R10G10B10UnormPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA2R10G10B10UscaledPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA4B4G4R4UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eA4R4G4B4UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eA8B8G8R8SintPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA8B8G8R8SnormPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA8B8G8R8SrgbPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA8B8G8R8SscaledPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA8B8G8R8UintPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA8B8G8R8UnormPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eA8B8G8R8UscaledPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eB10G11R11UfloatPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eB4G4R4A4UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eB5G5R5A1UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eB5G6R5UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eB8G8R8A8Sint, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8A8Snorm, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8A8Srgb, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8A8Sscaled, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8A8Uint, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8A8Unorm, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8A8Uscaled, CompatibilityClass::_32BIT},
+ {vk::Format::eB8G8R8Sint, CompatibilityClass::_24BIT},
+ {vk::Format::eB8G8R8Snorm, CompatibilityClass::_24BIT},
+ {vk::Format::eB8G8R8Srgb, CompatibilityClass::_24BIT},
+ {vk::Format::eB8G8R8Sscaled, CompatibilityClass::_24BIT},
+ {vk::Format::eB8G8R8Uint, CompatibilityClass::_24BIT},
+ {vk::Format::eB8G8R8Unorm, CompatibilityClass::_24BIT},
+ {vk::Format::eB8G8R8Uscaled, CompatibilityClass::_24BIT},
+ {vk::Format::eBc1RgbaSrgbBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
+ {vk::Format::eBc1RgbaUnormBlock, CompatibilityClass::BC1_RGBA | CompatibilityClass::_64BIT},
+ {vk::Format::eBc1RgbSrgbBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
+ {vk::Format::eBc1RgbUnormBlock, CompatibilityClass::BC1_RGB | CompatibilityClass::_64BIT},
+ {vk::Format::eBc2SrgbBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc2UnormBlock, CompatibilityClass::BC2 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc3SrgbBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc3UnormBlock, CompatibilityClass::BC3 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc4SnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
+ {vk::Format::eBc4UnormBlock, CompatibilityClass::BC4 | CompatibilityClass::_64BIT},
+ {vk::Format::eBc5SnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc5UnormBlock, CompatibilityClass::BC5 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc6HSfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
+ {vk::Format::eBc6HUfloatBlock, CompatibilityClass::BC6H | CompatibilityClass::_128BIT},
+ {vk::Format::eBc7SrgbBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
+ {vk::Format::eBc7UnormBlock, CompatibilityClass::BC7 | CompatibilityClass::_128BIT},
+ {vk::Format::eD16Unorm, CompatibilityClass::D16},
+ {vk::Format::eD16UnormS8Uint, CompatibilityClass::D16S8},
+ {vk::Format::eD24UnormS8Uint, CompatibilityClass::D24S8},
+ {vk::Format::eD32Sfloat, CompatibilityClass::D32},
+ {vk::Format::eD32SfloatS8Uint, CompatibilityClass::D32S8},
+ {vk::Format::eE5B9G9R9UfloatPack32, CompatibilityClass::_32BIT},
+ {vk::Format::eR10X6G10X6Unorm2Pack16, CompatibilityClass::_32BIT},
+ {vk::Format::eR10X6UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eR12X4G12X4Unorm2Pack16, CompatibilityClass::_32BIT},
+ {vk::Format::eR12X4UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eR16G16B16A16Sfloat, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16A16Sint, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16A16Snorm, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16A16Sscaled, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16A16Uint, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16A16Unorm, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16A16Uscaled, CompatibilityClass::_64BIT},
+ {vk::Format::eR16G16B16Sfloat, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16B16Sint, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16B16Snorm, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16B16Sscaled, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16B16Uint, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16B16Unorm, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16B16Uscaled, CompatibilityClass::_48BIT},
+ {vk::Format::eR16G16Sfloat, CompatibilityClass::_32BIT},
+ {vk::Format::eR16G16Sint, CompatibilityClass::_32BIT},
+ {vk::Format::eR16G16Snorm, CompatibilityClass::_32BIT},
+ {vk::Format::eR16G16Sscaled, CompatibilityClass::_32BIT},
+ {vk::Format::eR16G16Uint, CompatibilityClass::_32BIT},
+ {vk::Format::eR16G16Unorm, CompatibilityClass::_32BIT},
+ {vk::Format::eR16G16Uscaled, CompatibilityClass::_32BIT},
+ {vk::Format::eR16Sfloat, CompatibilityClass::_16BIT},
+ {vk::Format::eR16Sint, CompatibilityClass::_16BIT},
+ {vk::Format::eR16Snorm, CompatibilityClass::_16BIT},
+ {vk::Format::eR16Sscaled, CompatibilityClass::_16BIT},
+ {vk::Format::eR16Uint, CompatibilityClass::_16BIT},
+ {vk::Format::eR16Unorm, CompatibilityClass::_16BIT},
+ {vk::Format::eR16Uscaled, CompatibilityClass::_16BIT},
+ {vk::Format::eR32G32B32A32Sfloat, CompatibilityClass::_128BIT},
+ {vk::Format::eR32G32B32A32Sint, CompatibilityClass::_128BIT},
+ {vk::Format::eR32G32B32A32Uint, CompatibilityClass::_128BIT},
+ {vk::Format::eR32G32B32Sfloat, CompatibilityClass::_96BIT},
+ {vk::Format::eR32G32B32Sint, CompatibilityClass::_96BIT},
+ {vk::Format::eR32G32B32Uint, CompatibilityClass::_96BIT},
+ {vk::Format::eR32G32Sfloat, CompatibilityClass::_64BIT},
+ {vk::Format::eR32G32Sint, CompatibilityClass::_64BIT},
+ {vk::Format::eR32G32Uint, CompatibilityClass::_64BIT},
+ {vk::Format::eR32Sfloat, CompatibilityClass::_32BIT},
+ {vk::Format::eR32Sint, CompatibilityClass::_32BIT},
+ {vk::Format::eR32Uint, CompatibilityClass::_32BIT},
+ {vk::Format::eR4G4B4A4UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eR4G4UnormPack8, CompatibilityClass::_8BIT},
+ {vk::Format::eR5G5B5A1UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eR5G6B5UnormPack16, CompatibilityClass::_16BIT},
+ {vk::Format::eR64G64B64A64Sfloat, CompatibilityClass::_256BIT},
+ {vk::Format::eR64G64B64A64Sint, CompatibilityClass::_256BIT},
+ {vk::Format::eR64G64B64A64Uint, CompatibilityClass::_256BIT},
+ {vk::Format::eR64G64B64Sfloat, CompatibilityClass::_192BIT},
+ {vk::Format::eR64G64B64Sint, CompatibilityClass::_192BIT},
+ {vk::Format::eR64G64B64Uint, CompatibilityClass::_192BIT},
+ {vk::Format::eR64G64Sfloat, CompatibilityClass::_128BIT},
+ {vk::Format::eR64G64Sint, CompatibilityClass::_128BIT},
+ {vk::Format::eR64G64Uint, CompatibilityClass::_128BIT},
+ {vk::Format::eR64Sfloat, CompatibilityClass::_64BIT},
+ {vk::Format::eR64Sint, CompatibilityClass::_64BIT},
+ {vk::Format::eR64Uint, CompatibilityClass::_64BIT},
+ {vk::Format::eR8G8B8A8Sint, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8A8Snorm, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8A8Srgb, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8A8Sscaled, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8A8Uint, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8A8Unorm, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8A8Uscaled, CompatibilityClass::_32BIT},
+ {vk::Format::eR8G8B8Sint, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8B8Snorm, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8B8Srgb, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8B8Sscaled, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8B8Uint, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8B8Unorm, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8B8Uscaled, CompatibilityClass::_24BIT},
+ {vk::Format::eR8G8Sint, CompatibilityClass::_16BIT},
+ {vk::Format::eR8G8Snorm, CompatibilityClass::_16BIT},
+ {vk::Format::eR8G8Srgb, CompatibilityClass::_16BIT},
+ {vk::Format::eR8G8Sscaled, CompatibilityClass::_16BIT},
+ {vk::Format::eR8G8Uint, CompatibilityClass::_16BIT},
+ {vk::Format::eR8G8Unorm, CompatibilityClass::_16BIT},
+ {vk::Format::eR8G8Uscaled, CompatibilityClass::_16BIT},
+ {vk::Format::eR8Sint, CompatibilityClass::_8BIT},
+ {vk::Format::eR8Snorm, CompatibilityClass::_8BIT},
+ {vk::Format::eR8Srgb, CompatibilityClass::_8BIT},
+ {vk::Format::eR8Sscaled, CompatibilityClass::_8BIT},
+ {vk::Format::eR8Uint, CompatibilityClass::_8BIT},
+ {vk::Format::eR8Unorm, CompatibilityClass::_8BIT},
+ {vk::Format::eR8Uscaled, CompatibilityClass::_8BIT},
+ {vk::Format::eS8Uint, CompatibilityClass::S8},
+ {vk::Format::eX8D24UnormPack32, CompatibilityClass::D24},
+ {vk::Format::eUndefined, CompatibilityClass::NONE},
+};
+
+bool IsVulkanFormatCompatible(vk::Format base, vk::Format view) {
+ if (base == view) {
+ return true;
+ }
+ const auto base_comp = FORMAT_TABLE.at(base);
+ const auto view_comp = FORMAT_TABLE.at(view);
+ return (base_comp & view_comp) == view_comp;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/texture_cache/host_compatibility.h b/src/video_core/texture_cache/host_compatibility.h
index a73f7e6be..b0579137b 100644
--- a/src/video_core/texture_cache/host_compatibility.h
+++ b/src/video_core/texture_cache/host_compatibility.h
@@ -6,387 +6,11 @@
#pragma once
-#include
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
-/**
- * @brief All classes of format compatibility according to the Vulkan specification
- * @url
- * https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.h#L47-L131
- * @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
- * conventions
- */
-enum class FORMAT_COMPATIBILITY_CLASS {
- NONE = 0,
- _10BIT_2PLANE_420,
- _10BIT_2PLANE_422,
- _10BIT_2PLANE_444,
- _10BIT_3PLANE_420,
- _10BIT_3PLANE_422,
- _10BIT_3PLANE_444,
- _12BIT_2PLANE_420,
- _12BIT_2PLANE_422,
- _12BIT_2PLANE_444,
- _12BIT_3PLANE_420,
- _12BIT_3PLANE_422,
- _12BIT_3PLANE_444,
- _128BIT,
- _16BIT,
- _16BIT_2PLANE_420,
- _16BIT_2PLANE_422,
- _16BIT_2PLANE_444,
- _16BIT_3PLANE_420,
- _16BIT_3PLANE_422,
- _16BIT_3PLANE_444,
- _192BIT,
- _24BIT,
- _256BIT,
- _32BIT,
- _32BIT_B8G8R8G8,
- _32BIT_G8B8G8R8,
- _48BIT,
- _64BIT,
- _64BIT_B10G10R10G10,
- _64BIT_B12G12R12G12,
- _64BIT_B16G16R16G16,
- _64BIT_G10B10G10R10,
- _64BIT_G12B12G12R12,
- _64BIT_G16B16G16R16,
- _64BIT_R10G10B10A10,
- _64BIT_R12G12B12A12,
- _8BIT,
- _8BIT_2PLANE_420,
- _8BIT_2PLANE_422,
- _8BIT_2PLANE_444,
- _8BIT_3PLANE_420,
- _8BIT_3PLANE_422,
- _8BIT_3PLANE_444,
- _96BIT,
- ASTC_10X10,
- ASTC_10X5,
- ASTC_10X6,
- ASTC_10X8,
- ASTC_12X10,
- ASTC_12X12,
- ASTC_4X4,
- ASTC_5X4,
- ASTC_5X5,
- ASTC_6X5,
- ASTC_6X6,
- ASTC_8X5,
- ASTC_8X6,
- ASTC_8X8,
- BC1_RGB,
- BC1_RGBA,
- BC2,
- BC3,
- BC4,
- BC5,
- BC6H,
- BC7,
- D16,
- D16S8,
- D24,
- D24S8,
- D32,
- D32S8,
- EAC_R,
- EAC_RG,
- ETC2_EAC_RGBA,
- ETC2_RGB,
- ETC2_RGBA,
- PVRTC1_2BPP,
- PVRTC1_4BPP,
- PVRTC2_2BPP,
- PVRTC2_4BPP,
- S8
-};
-/**
- * @brief The format compatibility class according to the Vulkan specification
- * @url
- * https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility-classes
- * @url
- * https://github.com/KhronosGroup/Vulkan-ValidationLayers/blob/d37c676f75f545a3e5a98d7dfb89864391a1db1e/layers/generated/vk_format_utils.cpp#L70-L812
- * @note This is copied directly from Vulkan Validation Layers and doesn't follow the Skyline naming
- * conventions
- */
-static const std::unordered_map vkFormatClassTable{
- {VK_FORMAT_A1R5G5B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_A2B10G10R10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2B10G10R10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2B10G10R10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2B10G10R10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2B10G10R10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2B10G10R10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2R10G10B10_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2R10G10B10_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2R10G10B10_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2R10G10B10_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2R10G10B10_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A2R10G10B10_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_A8B8G8R8_SINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A8B8G8R8_SNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A8B8G8R8_SRGB_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A8B8G8R8_SSCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A8B8G8R8_UINT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A8B8G8R8_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_A8B8G8R8_USCALED_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
- {VK_FORMAT_ASTC_10x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
- {VK_FORMAT_ASTC_10x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X10},
- {VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
- {VK_FORMAT_ASTC_10x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
- {VK_FORMAT_ASTC_10x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X5},
- {VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
- {VK_FORMAT_ASTC_10x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
- {VK_FORMAT_ASTC_10x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X6},
- {VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
- {VK_FORMAT_ASTC_10x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
- {VK_FORMAT_ASTC_10x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_10X8},
- {VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
- {VK_FORMAT_ASTC_12x10_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
- {VK_FORMAT_ASTC_12x10_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X10},
- {VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
- {VK_FORMAT_ASTC_12x12_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
- {VK_FORMAT_ASTC_12x12_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_12X12},
- {VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
- {VK_FORMAT_ASTC_4x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
- {VK_FORMAT_ASTC_4x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_4X4},
- {VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
- {VK_FORMAT_ASTC_5x4_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
- {VK_FORMAT_ASTC_5x4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X4},
- {VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
- {VK_FORMAT_ASTC_5x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
- {VK_FORMAT_ASTC_5x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_5X5},
- {VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
- {VK_FORMAT_ASTC_6x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
- {VK_FORMAT_ASTC_6x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X5},
- {VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
- {VK_FORMAT_ASTC_6x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
- {VK_FORMAT_ASTC_6x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_6X6},
- {VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
- {VK_FORMAT_ASTC_8x5_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
- {VK_FORMAT_ASTC_8x5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X5},
- {VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
- {VK_FORMAT_ASTC_8x6_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
- {VK_FORMAT_ASTC_8x6_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X6},
- {VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK_EXT, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
- {VK_FORMAT_ASTC_8x8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
- {VK_FORMAT_ASTC_8x8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ASTC_8X8},
- {VK_FORMAT_B10G11R11_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
- FORMAT_COMPATIBILITY_CLASS::_64BIT_B10G10R10G10},
- {VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
- FORMAT_COMPATIBILITY_CLASS::_64BIT_B12G12R12G12},
- {VK_FORMAT_B16G16R16G16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_B16G16R16G16},
- {VK_FORMAT_B4G4R4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_B5G5R5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_B5G6R5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_B8G8R8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_B8G8R8G8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_B8G8R8G8},
- {VK_FORMAT_B8G8R8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_B8G8R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_B8G8R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_B8G8R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_B8G8R8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_B8G8R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_B8G8R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_BC1_RGBA_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
- {VK_FORMAT_BC1_RGBA_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGBA},
- {VK_FORMAT_BC1_RGB_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
- {VK_FORMAT_BC1_RGB_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC1_RGB},
- {VK_FORMAT_BC2_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
- {VK_FORMAT_BC2_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC2},
- {VK_FORMAT_BC3_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
- {VK_FORMAT_BC3_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC3},
- {VK_FORMAT_BC4_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
- {VK_FORMAT_BC4_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC4},
- {VK_FORMAT_BC5_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
- {VK_FORMAT_BC5_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC5},
- {VK_FORMAT_BC6H_SFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
- {VK_FORMAT_BC6H_UFLOAT_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC6H},
- {VK_FORMAT_BC7_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
- {VK_FORMAT_BC7_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::BC7},
- {VK_FORMAT_D16_UNORM, FORMAT_COMPATIBILITY_CLASS::D16},
- {VK_FORMAT_D16_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D16S8},
- {VK_FORMAT_D24_UNORM_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D24S8},
- {VK_FORMAT_D32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::D32},
- {VK_FORMAT_D32_SFLOAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::D32S8},
- {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_EAC_R11G11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
- {VK_FORMAT_EAC_R11G11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_RG},
- {VK_FORMAT_EAC_R11_SNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
- {VK_FORMAT_EAC_R11_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::EAC_R},
- {VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
- {VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGBA},
- {VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
- {VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_EAC_RGBA},
- {VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
- {VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK, FORMAT_COMPATIBILITY_CLASS::ETC2_RGB},
- {VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
- FORMAT_COMPATIBILITY_CLASS::_64BIT_G10B10G10R10},
- {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_420},
- {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_422},
- {VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT,
- FORMAT_COMPATIBILITY_CLASS::_10BIT_2PLANE_444},
- {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_420},
- {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_422},
- {VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_10BIT_3PLANE_444},
- {VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
- FORMAT_COMPATIBILITY_CLASS::_64BIT_G12B12G12R12},
- {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_420},
- {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_422},
- {VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT,
- FORMAT_COMPATIBILITY_CLASS::_12BIT_2PLANE_444},
- {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_420},
- {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_422},
- {VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
- FORMAT_COMPATIBILITY_CLASS::_12BIT_3PLANE_444},
- {VK_FORMAT_G16B16G16R16_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT_G16B16G16R16},
- {VK_FORMAT_G16_B16R16_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_420},
- {VK_FORMAT_G16_B16R16_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_422},
- {VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_16BIT_2PLANE_444},
- {VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_420},
- {VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_422},
- {VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT_3PLANE_444},
- {VK_FORMAT_G8B8G8R8_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT_G8B8G8R8},
- {VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_420},
- {VK_FORMAT_G8_B8R8_2PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_422},
- {VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT, FORMAT_COMPATIBILITY_CLASS::_8BIT_2PLANE_444},
- {VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_420},
- {VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_422},
- {VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT_3PLANE_444},
- {VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
- {VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_2BPP},
- {VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
- {VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC1_4BPP},
- {VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
- {VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_2BPP},
- {VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
- {VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG, FORMAT_COMPATIBILITY_CLASS::PVRTC2_4BPP},
- {VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R10G10B10A10},
- {VK_FORMAT_R10X6G10X6_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R10X6_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16, FORMAT_COMPATIBILITY_CLASS::_64BIT_R12G12B12A12},
- {VK_FORMAT_R12X4G12X4_UNORM_2PACK16, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R12X4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16G16B16A16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16A16_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16A16_SNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16A16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16A16_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16A16_UNORM, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16A16_USCALED, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R16G16B16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16B16_SINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16B16_SNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16B16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16B16_UINT, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16B16_UNORM, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16B16_USCALED, FORMAT_COMPATIBILITY_CLASS::_48BIT},
- {VK_FORMAT_R16G16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16G16_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16G16_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16G16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16G16_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16G16_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16G16_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R16_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R16_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R32G32B32A32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
- {VK_FORMAT_R32G32B32A32_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
- {VK_FORMAT_R32G32B32A32_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
- {VK_FORMAT_R32G32B32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
- {VK_FORMAT_R32G32B32_SINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
- {VK_FORMAT_R32G32B32_UINT, FORMAT_COMPATIBILITY_CLASS::_96BIT},
- {VK_FORMAT_R32G32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R32G32_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R32G32_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R32_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R32_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R32_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R4G4B4A4_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R4G4_UNORM_PACK8, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R5G5B5A1_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R5G6B5_UNORM_PACK16, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R64G64B64A64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
- {VK_FORMAT_R64G64B64A64_SINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
- {VK_FORMAT_R64G64B64A64_UINT, FORMAT_COMPATIBILITY_CLASS::_256BIT},
- {VK_FORMAT_R64G64B64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
- {VK_FORMAT_R64G64B64_SINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
- {VK_FORMAT_R64G64B64_UINT, FORMAT_COMPATIBILITY_CLASS::_192BIT},
- {VK_FORMAT_R64G64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
- {VK_FORMAT_R64G64_SINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
- {VK_FORMAT_R64G64_UINT, FORMAT_COMPATIBILITY_CLASS::_128BIT},
- {VK_FORMAT_R64_SFLOAT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R64_SINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R64_UINT, FORMAT_COMPATIBILITY_CLASS::_64BIT},
- {VK_FORMAT_R8G8B8A8_SINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8A8_SNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8A8_SRGB, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8A8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8A8_UINT, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8A8_UNORM, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8A8_USCALED, FORMAT_COMPATIBILITY_CLASS::_32BIT},
- {VK_FORMAT_R8G8B8_SINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8B8_SNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8B8_SRGB, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8B8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8B8_UINT, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8B8_UNORM, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8B8_USCALED, FORMAT_COMPATIBILITY_CLASS::_24BIT},
- {VK_FORMAT_R8G8_SINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8G8_SNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8G8_SRGB, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8G8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8G8_UINT, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8G8_UNORM, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8G8_USCALED, FORMAT_COMPATIBILITY_CLASS::_16BIT},
- {VK_FORMAT_R8_SINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R8_SNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R8_SRGB, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R8_SSCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R8_UINT, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R8_UNORM, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_R8_USCALED, FORMAT_COMPATIBILITY_CLASS::_8BIT},
- {VK_FORMAT_S8_UINT, FORMAT_COMPATIBILITY_CLASS::S8},
- {VK_FORMAT_X8_D24_UNORM_PACK32, FORMAT_COMPATIBILITY_CLASS::D24},
- {VK_FORMAT_UNDEFINED, FORMAT_COMPATIBILITY_CLASS::NONE},
-};
+/// Returns true if the two formats are compatible according to Vulkan's format compatibility rules
+bool IsVulkanFormatCompatible(vk::Format base, vk::Format view);
-/**
- * @return If the two formats are compatible according to Vulkan's format compatibility rules
- * @url
- * https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#formats-compatibility
- */
-static bool IsVulkanFormatCompatible(vk::Format lhs, vk::Format rhs) {
- if (lhs == rhs) {
- return true;
- }
- return vkFormatClassTable.at(VkFormat(lhs)) == vkFormatClassTable.at(VkFormat(rhs));
-}
} // namespace VideoCore
diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp
index 522e6fd5b..8522f1307 100644
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@@ -14,62 +14,6 @@ namespace VideoCore {
using namespace Vulkan;
-bool ImageInfo::IsBlockCoded() const {
- switch (pixel_format) {
- case vk::Format::eBc1RgbaSrgbBlock:
- case vk::Format::eBc1RgbaUnormBlock:
- case vk::Format::eBc1RgbSrgbBlock:
- case vk::Format::eBc1RgbUnormBlock:
- case vk::Format::eBc2SrgbBlock:
- case vk::Format::eBc2UnormBlock:
- case vk::Format::eBc3SrgbBlock:
- case vk::Format::eBc3UnormBlock:
- case vk::Format::eBc4SnormBlock:
- case vk::Format::eBc4UnormBlock:
- case vk::Format::eBc5SnormBlock:
- case vk::Format::eBc5UnormBlock:
- case vk::Format::eBc6HSfloatBlock:
- case vk::Format::eBc6HUfloatBlock:
- case vk::Format::eBc7SrgbBlock:
- case vk::Format::eBc7UnormBlock:
- return true;
- default:
- return false;
- }
-}
-
-bool ImageInfo::IsPacked() const {
- switch (pixel_format) {
- case vk::Format::eB5G5R5A1UnormPack16:
- [[fallthrough]];
- case vk::Format::eB5G6R5UnormPack16:
- return true;
- default:
- return false;
- }
-}
-
-bool ImageInfo::IsDepthStencil() const {
- switch (pixel_format) {
- case vk::Format::eD16Unorm:
- case vk::Format::eD16UnormS8Uint:
- case vk::Format::eD32Sfloat:
- case vk::Format::eD32SfloatS8Uint:
- return true;
- default:
- return false;
- }
-}
-
-bool ImageInfo::HasStencil() const {
- if (pixel_format == vk::Format::eD32SfloatS8Uint ||
- pixel_format == vk::Format::eD24UnormS8Uint ||
- pixel_format == vk::Format::eD16UnormS8Uint) {
- return true;
- }
- return false;
-}
-
static vk::ImageUsageFlags ImageUsageFlags(const ImageInfo& info) {
vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferSrc |
vk::ImageUsageFlagBits::eTransferDst |
@@ -161,6 +105,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
if (info.props.is_volume) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
+ if (info.props.is_block) {
+ flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
+ }
usage_flags = ImageUsageFlags(info);
format_features = FormatFeatureFlags(usage_flags);
@@ -372,9 +319,9 @@ void Image::CopyImage(const Image& image) {
boost::container::small_vector image_copy{};
for (u32 m = 0; m < image.info.resources.levels; ++m) {
- const auto mip_w = std::max(info.size.width >> m, 1u);
- const auto mip_h = std::max(info.size.height >> m, 1u);
- const auto mip_d = std::max(info.size.depth >> m, 1u);
+ const auto mip_w = std::max(image.info.size.width >> m, 1u);
+ const auto mip_h = std::max(image.info.size.height >> m, 1u);
+ const auto mip_d = std::max(image.info.size.depth >> m, 1u);
image_copy.emplace_back(vk::ImageCopy{
.srcSubresource{
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
index 39322f449..769c4211f 100644
--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -81,7 +81,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
tiling_mode = buffer.GetTilingMode();
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.GetDataFmt(), buffer.GetNumberFmt());
num_samples = buffer.NumSamples();
- num_bits = NumBits(buffer.GetDataFmt());
+ num_bits = NumBitsPerBlock(buffer.GetDataFmt());
type = vk::ImageType::e2D;
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
@@ -142,7 +142,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
resources.levels = image.NumLevels();
resources.layers = image.NumLayers();
num_samples = image.NumSamples();
- num_bits = NumBits(image.GetDataFmt());
+ num_bits = NumBitsPerBlock(image.GetDataFmt());
guest_address = image.Address();
@@ -152,6 +152,80 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
UpdateSize();
}
+bool ImageInfo::IsBlockCoded() const {
+ switch (pixel_format) {
+ case vk::Format::eBc1RgbaSrgbBlock:
+ case vk::Format::eBc1RgbaUnormBlock:
+ case vk::Format::eBc1RgbSrgbBlock:
+ case vk::Format::eBc1RgbUnormBlock:
+ case vk::Format::eBc2SrgbBlock:
+ case vk::Format::eBc2UnormBlock:
+ case vk::Format::eBc3SrgbBlock:
+ case vk::Format::eBc3UnormBlock:
+ case vk::Format::eBc4SnormBlock:
+ case vk::Format::eBc4UnormBlock:
+ case vk::Format::eBc5SnormBlock:
+ case vk::Format::eBc5UnormBlock:
+ case vk::Format::eBc6HSfloatBlock:
+ case vk::Format::eBc6HUfloatBlock:
+ case vk::Format::eBc7SrgbBlock:
+ case vk::Format::eBc7UnormBlock:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ImageInfo::IsPacked() const {
+ switch (pixel_format) {
+ case vk::Format::eB5G5R5A1UnormPack16:
+ [[fallthrough]];
+ case vk::Format::eB5G6R5UnormPack16:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ImageInfo::IsDepthStencil() const {
+ switch (pixel_format) {
+ case vk::Format::eD16Unorm:
+ case vk::Format::eD16UnormS8Uint:
+ case vk::Format::eD32Sfloat:
+ case vk::Format::eD32SfloatS8Uint:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool ImageInfo::HasStencil() const {
+ if (pixel_format == vk::Format::eD32SfloatS8Uint ||
+ pixel_format == vk::Format::eD24UnormS8Uint ||
+ pixel_format == vk::Format::eD16UnormS8Uint) {
+ return true;
+ }
+ return false;
+}
+
+bool ImageInfo::IsCompatible(const ImageInfo& info) const {
+ return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
+ num_bits == info.num_bits);
+}
+
+bool ImageInfo::IsTilingCompatible(u32 lhs, u32 rhs) const {
+ if (lhs == rhs) {
+ return true;
+ }
+ if (lhs == 0x0e && rhs == 0x0d) {
+ return true;
+ }
+ if (lhs == 0x0d && rhs == 0x0e) {
+ return true;
+ }
+ return false;
+}
+
void ImageInfo::UpdateSize() {
mips_layout.clear();
MipInfo mip_info{};
@@ -163,7 +237,6 @@ void ImageInfo::UpdateSize() {
if (props.is_block) {
mip_w = (mip_w + 3) / 4;
mip_h = (mip_h + 3) / 4;
- bpp *= 16;
}
mip_w = std::max(mip_w, 1u);
mip_h = std::max(mip_h, 1u);
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
index ca4d9f5e9..47718a095 100644
--- a/src/video_core/texture_cache/image_info.h
+++ b/src/video_core/texture_cache/image_info.h
@@ -25,6 +25,11 @@ struct ImageInfo {
bool IsTiled() const {
return tiling_mode != AmdGpu::TilingMode::Display_Linear;
}
+ Extent3D BlockDim() const {
+ const u32 shift = props.is_block ? 2 : 0;
+ return Extent3D{size.width >> shift, size.height >> shift, size.depth};
+ }
+
bool IsBlockCoded() const;
bool IsPacked() const;
bool IsDepthStencil() const;
@@ -33,24 +38,8 @@ struct ImageInfo {
s32 MipOf(const ImageInfo& info) const;
s32 SliceOf(const ImageInfo& info, s32 mip) const;
- /// Verifies if images are compatible for subresource merging.
- bool IsCompatible(const ImageInfo& info) const {
- return (pixel_format == info.pixel_format && num_samples == info.num_samples &&
- num_bits == info.num_bits);
- }
-
- bool IsTilingCompatible(u32 lhs, u32 rhs) const {
- if (lhs == rhs) {
- return true;
- }
- if (lhs == 0x0e && rhs == 0x0d) {
- return true;
- }
- if (lhs == 0x0d && rhs == 0x0e) {
- return true;
- }
- return false;
- }
+ bool IsCompatible(const ImageInfo& info) const;
+ bool IsTilingCompatible(u32 lhs, u32 rhs) const;
void UpdateSize();
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
index 4b173c313..f070b9132 100644
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -199,7 +199,8 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag
scheduler.CurrentTick() - tex_cache_image.tick_accessed_last > NumFramesBeforeRemoval;
if (image_info.guest_address == tex_cache_image.info.guest_address) { // Equal address
- if (image_info.size != tex_cache_image.info.size) {
+ if (image_info.BlockDim() != tex_cache_image.info.BlockDim() ||
+ image_info.num_bits != tex_cache_image.info.num_bits) {
// Very likely this kind of overlap is caused by allocation from a pool.
if (safe_to_delete) {
FreeImage(cache_image_id);
@@ -211,15 +212,19 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag
return {depth_image_id, -1, -1};
}
+ if (image_info.IsBlockCoded() && !tex_cache_image.info.IsBlockCoded()) {
+ // Compressed view of uncompressed image with same block size.
+ // We need to recreate the image with compressed format and copy.
+ return {ExpandImage(image_info, cache_image_id), -1, -1};
+ }
+
if (image_info.pixel_format != tex_cache_image.info.pixel_format ||
image_info.guest_size <= tex_cache_image.info.guest_size) {
auto result_id = merged_image_id ? merged_image_id : cache_image_id;
const auto& result_image = slot_images[result_id];
- return {
- IsVulkanFormatCompatible(image_info.pixel_format, result_image.info.pixel_format)
- ? result_id
- : ImageId{},
- -1, -1};
+ const bool is_compatible =
+ IsVulkanFormatCompatible(result_image.info.pixel_format, image_info.pixel_format);
+ return {is_compatible ? result_id : ImageId{}, -1, -1};
}
if (image_info.type == tex_cache_image.info.type &&
@@ -340,7 +345,7 @@ ImageId TextureCache::FindImage(BaseDesc& desc, FindFlags flags) {
continue;
}
if (False(flags & FindFlags::RelaxFmt) &&
- (!IsVulkanFormatCompatible(info.pixel_format, cache_image.info.pixel_format) ||
+ (!IsVulkanFormatCompatible(cache_image.info.pixel_format, info.pixel_format) ||
(cache_image.info.type != info.type && info.size != Extent3D{1, 1, 1}))) {
continue;
}
@@ -512,9 +517,9 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
// So this calculation should be very uncommon and reasonably fast
// For now we'll just check up to 64 first pixels
const auto addr = std::bit_cast(image.info.guest_address);
- const auto w = std::min(image.info.size.width, u32(8));
- const auto h = std::min(image.info.size.height, u32(8));
- const auto size = w * h * image.info.num_bits / 8;
+ const u32 w = std::min(image.info.size.width, u32(8));
+ const u32 h = std::min(image.info.size.height, u32(8));
+ const u32 size = w * h * image.info.num_bits >> (3 + image.info.props.is_block ? 4 : 0);
const u64 hash = XXH3_64bits(addr, size);
if (image.hash == hash) {
image.flags &= ~ImageFlagBits::MaybeCpuDirty;
diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp
index d7fc54338..683ac08db 100644
--- a/src/video_core/texture_cache/tile_manager.cpp
+++ b/src/video_core/texture_cache/tile_manager.cpp
@@ -25,10 +25,9 @@
namespace VideoCore {
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
- const auto bpp = info.num_bits * (info.props.is_block ? 16 : 1);
switch (info.tiling_mode) {
case AmdGpu::TilingMode::Texture_MicroTiled:
- switch (bpp) {
+ switch (info.num_bits) {
case 8:
return &detilers[DetilerType::Micro8];
case 16:
@@ -43,7 +42,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
return nullptr;
}
case AmdGpu::TilingMode::Texture_Volume:
- switch (bpp) {
+ switch (info.num_bits) {
case 8:
return &detilers[DetilerType::Macro8];
case 32:
@@ -55,7 +54,7 @@ const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
}
break;
case AmdGpu::TilingMode::Display_MicroTiled:
- switch (bpp) {
+ switch (info.num_bits) {
case 64:
return &detilers[DetilerType::Display_Micro64];
default:
@@ -71,7 +70,7 @@ struct DetilerParams {
u32 num_levels;
u32 pitch0;
u32 height;
- u32 sizes[14];
+ std::array sizes;
};
TileManager::TileManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler)
@@ -276,7 +275,7 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
params.sizes[0] = tiles_per_row;
params.sizes[1] = tiles_per_slice;
} else {
- ASSERT(info.resources.levels <= 14);
+ ASSERT(info.resources.levels <= params.sizes.size());
std::memset(¶ms.sizes, 0, sizeof(params.sizes));
for (int m = 0; m < info.resources.levels; ++m) {
params.sizes[m] = info.mips_layout[m].size + (m > 0 ? params.sizes[m - 1] : 0);
@@ -287,8 +286,7 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
¶ms);
ASSERT((image_size % 64) == 0);
- const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u);
- const auto num_tiles = image_size / (64 * (bpp / 8));
+ const auto num_tiles = image_size / (64 * (info.num_bits / 8));
cmdbuf.dispatch(num_tiles, 1, 1);
return {out_buffer.first, 0};
}
From 14b082f5ea1ac520a6b1c5ce5c930e5c7ada5bb3 Mon Sep 17 00:00:00 2001
From: TheTurtle
Date: Mon, 9 Jun 2025 01:28:00 +0300
Subject: [PATCH 21/28] buffer_cache: Inline data to cpu unless gpu modified
(#3061)
---
src/video_core/buffer_cache/buffer_cache.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index 8a5283d83..f53c111e9 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -293,7 +293,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
- if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
+ if (!is_gds && !IsRegionGpuModified(address, num_bytes)) {
memcpy(std::bit_cast(address), value, num_bytes);
return;
}
From ae2053c487e67e16a1f33bf5af9280714bc435fd Mon Sep 17 00:00:00 2001
From: squidbus <175574877+squidbus@users.noreply.github.com>
Date: Sun, 8 Jun 2025 15:41:58 -0700
Subject: [PATCH 22/28] fix: Disable eBlockTexelViewCompatible on MoltenVK
---
src/video_core/texture_cache/image.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp
index 8522f1307..d8070da61 100644
--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@@ -105,7 +105,8 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
if (info.props.is_volume) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}
- if (info.props.is_block) {
+ // Not supported by MoltenVK.
+ if (info.props.is_block && instance->GetDriverID() != vk::DriverId::eMoltenvk) {
flags |= vk::ImageCreateFlagBits::eBlockTexelViewCompatible;
}
From c20d02dd4094dbab2e990fcd27987e8de1bb4522 Mon Sep 17 00:00:00 2001
From: TheTurtle
Date: Mon, 9 Jun 2025 03:31:51 +0300
Subject: [PATCH 23/28] shader_recompiler: Better handling of geometry shader
scenario G (#3064)
---
src/shader_recompiler/frontend/copy_shader.cpp | 3 +++
src/shader_recompiler/frontend/copy_shader.h | 5 +++--
.../frontend/translate/translate.h | 1 +
.../ir/passes/readlane_elimination_pass.cpp | 1 +
.../ir/passes/ring_access_elimination.cpp | 15 ++++++++++++++-
src/shader_recompiler/runtime_info.h | 1 +
src/video_core/amdgpu/liverpool.h | 10 +++++++++-
.../renderer_vulkan/vk_pipeline_cache.cpp | 1 +
8 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/src/shader_recompiler/frontend/copy_shader.cpp b/src/shader_recompiler/frontend/copy_shader.cpp
index 8750e2b18..4b5869e1d 100644
--- a/src/shader_recompiler/frontend/copy_shader.cpp
+++ b/src/shader_recompiler/frontend/copy_shader.cpp
@@ -67,6 +67,9 @@ CopyShaderData ParseCopyShader(std::span code) {
if (last_attr != IR::Attribute::Position0) {
data.num_attrs = static_cast(last_attr) - static_cast(IR::Attribute::Param0) + 1;
+ const auto it = data.attr_map.begin();
+ const u32 comp_stride = std::next(it)->first - it->first;
+ data.output_vertices = comp_stride / 64;
}
return data;
diff --git a/src/shader_recompiler/frontend/copy_shader.h b/src/shader_recompiler/frontend/copy_shader.h
index 55cc31ebd..24c7060ed 100644
--- a/src/shader_recompiler/frontend/copy_shader.h
+++ b/src/shader_recompiler/frontend/copy_shader.h
@@ -3,8 +3,8 @@
#pragma once
+#include