shader_recompiler: Convert cube images into 2D arrays.

2025-08-05 17:02:40 +00:00 · 2025-01-07 04:41:53 -08:00 · 2025-01-07 04:41:53 -08:00 · 501b921e49
commit 501b921e49
parent 120e6ea28f
14 changed files with 101 additions and 128 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@ -259,14 +259,6 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
            ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD);
        }
    }
-    if (info.has_cube_arrays) {
-        if (info.has_storage_cube_arrays) {
-            // Implies SampledCubeArray
-            ctx.AddCapability(spv::Capability::ImageCubeArray);
-        } else {
-            ctx.AddCapability(spv::Capability::SampledCubeArray);
-        }
-    }
    if (info.has_texel_buffers) {
        ctx.AddCapability(spv::Capability::SampledBuffer);
    }
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@ -190,12 +190,6 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
    case AmdGpu::ImageType::Color2DArray:
    case AmdGpu::ImageType::Color3D:
        return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
-    case AmdGpu::ImageType::Cube:
-        // Cube arrays do not have their own type to distinguish by.
-        if (texture.is_array) {
-            return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
-        }
-        return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
    default:
        UNREACHABLE_MSG("SPIR-V Instruction");
    }
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
        return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
    case AmdGpu::ImageType::Color3D:
        return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
-    case AmdGpu::ImageType::Cube:
-        return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
-                             format);
    default:
        break;
    }
@ -820,7 +817,6 @@ void EmitContext::DefineImagesAndSamplers() {
            .bound_type = image_desc.GetBoundType(sharp),
            .is_integer = is_integer,
            .is_storage = is_storage,
-            .is_array = image_desc.is_array,
        });
        interfaces.push_back(id);
    }
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@ -225,7 +225,6 @@ public:
        AmdGpu::ImageType bound_type;
        bool is_integer = false;
        bool is_storage = false;
-        bool is_array = false;
    };

    struct BufferDefinition {
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@ -301,6 +301,9 @@ private:
    IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
    void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);

+    IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
+                             const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
+
    void LogMissingOpcode(const GcnInst& inst);

 private:
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@ -1042,20 +1042,75 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
    V_MAD_I32_I24(inst, false);
 }

+IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
+                                     const IR::F32& x_res, const IR::F32& y_res,
+                                     const IR::F32& z_res) {
+    const auto abs_x = ir.FPAbs(x);
+    const auto abs_y = ir.FPAbs(y);
+    const auto abs_z = ir.FPAbs(z);
+
+    const auto z_face_cond{
+        ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))};
+    const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)};
+
+    return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))};
+}
+
 void Translator::V_CUBEID_F32(const GcnInst& inst) {
-    SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
+    const auto x = GetSrc<IR::F32>(inst.src[0]);
+    const auto y = GetSrc<IR::F32>(inst.src[1]);
+    const auto z = GetSrc<IR::F32>(inst.src[2]);
+
+    const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
+    const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
+    const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
+    const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
+    const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
+    const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
+
+    const auto result{SelectCubeResult(x, y, z, x_face, y_face, z_face)};
+    SetDst(inst.dst[0], result);
 }

 void Translator::V_CUBESC_F32(const GcnInst& inst) {
-    SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
+    const auto x = GetSrc<IR::F32>(inst.src[0]);
+    const auto y = GetSrc<IR::F32>(inst.src[1]);
+    const auto z = GetSrc<IR::F32>(inst.src[2]);
+
+    const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
+    const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
+    const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
+    const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
+
+    const auto result{SelectCubeResult(x, y, z, x_sc, x, z_sc)};
+    SetDst(inst.dst[0], result);
 }

 void Translator::V_CUBETC_F32(const GcnInst& inst) {
-    SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
+    const auto x = GetSrc<IR::F32>(inst.src[0]);
+    const auto y = GetSrc<IR::F32>(inst.src[1]);
+    const auto z = GetSrc<IR::F32>(inst.src[2]);
+
+    const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
+    const IR::F32 x_z_sc{ir.FPNeg(y)};
+    const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
+
+    const auto result{SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc)};
+    SetDst(inst.dst[0], result);
 }

 void Translator::V_CUBEMA_F32(const GcnInst& inst) {
-    SetDst(inst.dst[0], ir.Imm32(1.f));
+    const auto x = GetSrc<IR::F32>(inst.src[0]);
+    const auto y = GetSrc<IR::F32>(inst.src[1]);
+    const auto z = GetSrc<IR::F32>(inst.src[2]);
+
+    const auto two{ir.Imm32(4.f)};
+    const IR::F32 x_major_axis{ir.FPMul(x, two)};
+    const IR::F32 y_major_axis{ir.FPMul(y, two)};
+    const IR::F32 z_major_axis{ir.FPMul(z, two)};
+
+    const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)};
+    SetDst(inst.dst[0], result);
 }

 void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
--- a/src/shader_recompiler/info.h
+++ b/src/shader_recompiler/info.h
@ -81,16 +81,36 @@ struct ImageResource {
        if (base_type == AmdGpu::ImageType::Color2DArray && !is_array) {
            return AmdGpu::ImageType::Color2D;
        }
-        if (image.IsPartialCubemap()) {
-            // Partial cube map
-            return AmdGpu::ImageType::Color2DArray;
+        if (base_type == AmdGpu::ImageType::Color2DMsaaArray && !is_array) {
+            return AmdGpu::ImageType::Color2DMsaa;
        }
        return base_type;
    }

+    [[nodiscard]] u32 NumViewLevels(const AmdGpu::Image& image) const noexcept {
+        switch (GetBoundType(image)) {
+        case AmdGpu::ImageType::Color2DMsaa:
+        case AmdGpu::ImageType::Color2DMsaaArray:
+            return 1;
+        default:
+            return image.last_level - image.base_level + 1;
+        }
+    }
+
+    [[nodiscard]] u32 NumViewLayers(const AmdGpu::Image image) const noexcept {
+        switch (GetBoundType(image)) {
+        case AmdGpu::ImageType::Color1D:
+        case AmdGpu::ImageType::Color2D:
+        case AmdGpu::ImageType::Color2DMsaa:
+        case AmdGpu::ImageType::Color3D:
+            return 1;
+        default:
+            return image.last_array - image.base_array + 1;
+        }
+    }
+
    [[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept {
-        // Need cube as storage when used with ImageRead.
-        return is_written || (is_read && GetBoundType(image) == AmdGpu::ImageType::Cube);
+        return is_written;
    }

    [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
@ -206,8 +226,6 @@ struct Info {
    u64 pgm_hash{};
    VAddr pgm_base;
    bool has_storage_images{};
-    bool has_cube_arrays{};
-    bool has_storage_cube_arrays{};
    bool has_image_buffers{};
    bool has_texel_buffers{};
    bool has_discard{};
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@ -560,32 +560,6 @@ void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
    }
 }

-IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
-                         const IR::Value& z, bool is_written, bool is_array) {
-    // When cubemap is written with imageStore it is treated like 2DArray.
-    if (is_written) {
-        return ir.CompositeConstruct(s, t, z);
-    }
-
-    ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
-
-    // We need to fix x and y coordinate,
-    // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
-    // We already force the scale value to be 1.0 when handling v_cubema_f32,
-    // here we subtract 1.5 to recover the original value.
-    const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
-    const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
-    if (is_array) {
-        const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
-        const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
-        const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
-        return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
-                                     ir.ConvertIToF(32, 32, false, slice_id));
-    } else {
-        return ir.CompositeConstruct(x, y, z);
-    }
-}
-
 void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
                          const AmdGpu::Image& image) {
    const auto handle = inst.Arg(0);
@ -649,7 +623,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
        case AmdGpu::ImageType::Color2DMsaa:
            return ir.CompositeConstruct(read(0), read(8));
        case AmdGpu::ImageType::Color3D:
-        case AmdGpu::ImageType::Cube:
            return ir.CompositeConstruct(read(0), read(8), read(16));
        default:
            UNREACHABLE();
@ -675,7 +648,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
            return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)),
                    ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))};
        case AmdGpu::ImageType::Color3D:
-        case AmdGpu::ImageType::Cube:
            // (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy)
            addr_reg = addr_reg + 6;
            return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5),
@ -725,10 +697,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
            addr_reg = addr_reg + 3;
            return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
                                         get_coord(addr_reg - 1, 2));
-        case AmdGpu::ImageType::Cube: // x, y, face
-            addr_reg = addr_reg + 3;
-            return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
-                                  get_addr_reg(addr_reg - 1), false, inst_info.is_array);
        default:
            UNREACHABLE();
        }
@ -806,10 +774,6 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
            [[fallthrough]];
        case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
            return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
-        case AmdGpu::ImageType::Cube: // x, y, face, [lod]
-            return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
-                                   inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
-                    body->Arg(3)};
        default:
            UNREACHABLE_MSG("Unknown image type {}", image.GetType());
        }
--- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
+++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
@ -87,16 +87,6 @@ void Visit(Info& info, const IR::Inst& inst) {
    }
 }

-void VisitImage(Info& info, const ImageResource& image) {
-    const auto sharp = image.GetSharp(info);
-    if (image.GetBoundType(sharp) == AmdGpu::ImageType::Cube && image.is_array) {
-        info.has_cube_arrays = true;
-        if (image.IsStorage(sharp)) {
-            info.has_storage_cube_arrays = true;
-        }
-    }
-}
-
 void CollectShaderInfoPass(IR::Program& program) {
    Info& info{program.info};
    for (IR::Block* const block : program.post_order_blocks) {
@ -104,9 +94,6 @@ void CollectShaderInfoPass(IR::Program& program) {
            Visit(info, inst);
        }
    }
-    for (const auto& image : program.info.images) {
-        VisitImage(info, image);
-    }
 }

 } // namespace Shader::Optimization
--- a/src/video_core/amdgpu/resource.h
+++ b/src/video_core/amdgpu/resource.h
@ -226,15 +226,13 @@ struct Image {
        return pitch + 1;
    }

-    u32 NumLayers(bool is_array) const {
-        u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1;
-        if (GetType() == ImageType::Cube) {
-            if (is_array) {
-                slices = last_array + 1;
-                ASSERT(slices % 6 == 0);
-            } else {
-                slices = 6;
-            }
+    [[nodiscard]] u32 NumLayers() const noexcept {
+        u32 slices = depth + 1;
+        const auto img_type = static_cast<ImageType>(type);
+        if (img_type == ImageType::Color3D) {
+            slices = 1;
+        } else if (img_type == ImageType::Cube) {
+            slices *= 6;
        }
        if (pow2pad) {
            slices = std::bit_ceil(slices);
@ -257,7 +255,8 @@ struct Image {
    }

    ImageType GetType() const noexcept {
-        return static_cast<ImageType>(type);
+        const auto img_type = static_cast<ImageType>(type);
+        return img_type == ImageType::Cube ? ImageType::Color2DArray : img_type;
    }

    DataFormat GetDataFmt() const noexcept {
@ -288,11 +287,6 @@ struct Image {
        return GetDataFmt() >= DataFormat::FormatFmask8_1 &&
               GetDataFmt() <= DataFormat::FormatFmask64_8;
    }
-
-    bool IsPartialCubemap() const {
-        const auto viewed_slice = last_array - base_array + 1;
-        return GetType() == ImageType::Cube && viewed_slice < 6;
-    }
 };
 static_assert(sizeof(Image) == 32); // 256bits

--- a/src/video_core/texture_cache/image.cpp
+++ b/src/video_core/texture_cache/image.cpp
@ -153,13 +153,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
    // the texture cache should re-create the resource with the usage requested
    vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
                               vk::ImageCreateFlagBits::eExtendedUsage};
-    const bool can_be_cube =
-        (info.type == vk::ImageType::e2D) &&
-        ((info.props.is_pow2 ? (info.resources.layers % 8) : (info.resources.layers % 6)) == 0) &&
-        (info.size.width == info.size.height);
-    if (info.props.is_cube || can_be_cube) {
-        flags |= vk::ImageCreateFlagBits::eCubeCompatible;
-    } else if (info.props.is_volume) {
+    if (info.props.is_volume) {
        flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
    }

--- a/src/video_core/texture_cache/image_info.cpp
+++ b/src/video_core/texture_cache/image_info.cpp
@ -37,7 +37,6 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
        return vk::ImageType::e1D;
    case AmdGpu::ImageType::Color2D:
    case AmdGpu::ImageType::Color2DMsaa:
-    case AmdGpu::ImageType::Cube:
    case AmdGpu::ImageType::Color2DArray:
        return vk::ImageType::e2D;
    case AmdGpu::ImageType::Color3D:
@ -130,7 +129,6 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
    }
    type = ConvertImageType(image.GetType());
    props.is_tiled = image.IsTiled();
-    props.is_cube = image.GetType() == AmdGpu::ImageType::Cube;
    props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D;
    props.is_pow2 = image.pow2pad;
    props.is_block = IsBlockCoded();
@ -139,7 +137,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
    size.depth = props.is_volume ? image.depth + 1 : 1;
    pitch = image.Pitch();
    resources.levels = image.NumLevels();
-    resources.layers = image.NumLayers(desc.is_array);
+    resources.layers = image.NumLayers();
    num_samples = image.NumSamples();
    num_bits = NumBits(image.GetDataFmt());

--- a/src/video_core/texture_cache/image_info.h
+++ b/src/video_core/texture_cache/image_info.h
@ -61,7 +61,6 @@ struct ImageInfo {
    } meta_info{};

    struct {
-        u32 is_cube : 1;
        u32 is_volume : 1;
        u32 is_tiled : 1;
        u32 is_pow2 : 1;
--- a/src/video_core/texture_cache/image_view.cpp
+++ b/src/video_core/texture_cache/image_view.cpp
@ -20,8 +20,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) {
    case AmdGpu::ImageType::Color2D:
    case AmdGpu::ImageType::Color2DMsaa:
        return vk::ImageViewType::e2D;
-    case AmdGpu::ImageType::Cube:
-        return vk::ImageViewType::eCube;
    case AmdGpu::ImageType::Color2DArray:
        return vk::ImageViewType::e2DArray;
    case AmdGpu::ImageType::Color3D:
@ -42,31 +40,13 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
    if (desc.is_depth) {
        format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format);
    }
+
    range.base.level = image.base_level;
    range.base.layer = image.base_array;
-    if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
-        image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
-        range.extent.levels = 1;
-    } else {
-        range.extent.levels = image.last_level - image.base_level + 1;
-    }
-    range.extent.layers = image.last_array - image.base_array + 1;
+    range.extent.levels = desc.NumViewLevels(image);
+    range.extent.layers = desc.NumViewLayers(image);
    type = ConvertImageViewType(desc.GetBoundType(image));

-    // Adjust view type for arrays
-    if (type == vk::ImageViewType::eCube) {
-        if (desc.is_array) {
-            type = vk::ImageViewType::eCubeArray;
-        } else {
-            // Some games try to bind an array of cubemaps while shader reads only single one.
-            range.extent.layers = std::min(range.extent.layers, 6u);
-        }
-    }
-    if (type == vk::ImageViewType::e3D && range.extent.layers > 1) {
-        // Some games pass incorrect layer count for 3D textures so we need to fixup it.
-        range.extent.layers = 1;
-    }
-
    if (!is_storage) {
        mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect());
    }