From 501b921e490bbe81bd9040bab5b54f0224ea8a2c Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Tue, 7 Jan 2025 04:41:53 -0800 Subject: [PATCH] shader_recompiler: Convert cube images into 2D arrays. --- .../backend/spirv/emit_spirv.cpp | 8 --- .../backend/spirv/emit_spirv_image.cpp | 6 -- .../backend/spirv/spirv_emit_context.cpp | 4 -- .../backend/spirv/spirv_emit_context.h | 1 - .../frontend/translate/translate.h | 3 + .../frontend/translate/vector_alu.cpp | 63 +++++++++++++++++-- src/shader_recompiler/info.h | 32 +++++++--- .../ir/passes/resource_tracking_pass.cpp | 36 ----------- .../ir/passes/shader_info_collection_pass.cpp | 13 ---- src/video_core/amdgpu/resource.h | 24 +++---- src/video_core/texture_cache/image.cpp | 8 +-- src/video_core/texture_cache/image_info.cpp | 4 +- src/video_core/texture_cache/image_info.h | 1 - src/video_core/texture_cache/image_view.cpp | 26 +------- 14 files changed, 101 insertions(+), 128 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5615c3224..f0cf15af0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -259,14 +259,6 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD); } } - if (info.has_cube_arrays) { - if (info.has_storage_cube_arrays) { - // Implies SampledCubeArray - ctx.AddCapability(spv::Capability::ImageCubeArray); - } else { - ctx.AddCapability(spv::Capability::SampledCubeArray); - } - } if (info.has_texel_buffers) { ctx.AddCapability(spv::Capability::SampledBuffer); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 12fd38d9a..0831b25b2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -190,12 +190,6 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod case AmdGpu::ImageType::Color2DArray: case AmdGpu::ImageType::Color3D: return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips()); - case AmdGpu::ImageType::Cube: - // Cube arrays do not have their own type to distinguish by. - if (texture.is_array) { - return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips()); - } - return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips()); default: UNREACHABLE_MSG("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 06f9853a4..ee6532489 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) { return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format); case AmdGpu::ImageType::Color3D: return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format); - case AmdGpu::ImageType::Cube: - return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled, - format); default: break; } @@ -820,7 +817,6 @@ void EmitContext::DefineImagesAndSamplers() { .bound_type = image_desc.GetBoundType(sharp), .is_integer = is_integer, .is_storage = is_storage, - .is_array = image_desc.is_array, }); interfaces.push_back(id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 80b0bc432..80d0d4d9f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -225,7 +225,6 @@ public: AmdGpu::ImageType bound_type; bool is_integer = false; bool is_storage = false; - bool is_array = false; }; struct BufferDefinition { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 7a0b736d4..bef61f997 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -301,6 +301,9 @@ private: IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); + IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z, + const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res); + void LogMissingOpcode(const GcnInst& inst); private: diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7fa83eebb..8dc7feed8 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1042,20 +1042,75 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) { V_MAD_I32_I24(inst, false); } +IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z, + const IR::F32& x_res, const IR::F32& y_res, + const IR::F32& z_res) { + const auto abs_x = ir.FPAbs(x); + const auto abs_y = ir.FPAbs(y); + const auto abs_z = ir.FPAbs(z); + + const auto z_face_cond{ + ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))}; + const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)}; + + return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))}; +} + void Translator::V_CUBEID_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[2])); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; + const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; + const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; + const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))}; + const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))}; + const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))}; + + const auto result{SelectCubeResult(x, y, z, x_face, y_face, z_face)}; + SetDst(inst.dst[0], result); } void Translator::V_CUBESC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[0])); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))}; + const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))}; + const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)}; + const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))}; + + const auto result{SelectCubeResult(x, y, z, x_sc, x, z_sc)}; + SetDst(inst.dst[0], result); } void Translator::V_CUBETC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], GetSrc(inst.src[1])); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))}; + const IR::F32 x_z_sc{ir.FPNeg(y)}; + const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)}; + + const auto result{SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc)}; + SetDst(inst.dst[0], result); } void Translator::V_CUBEMA_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.Imm32(1.f)); + const auto x = GetSrc(inst.src[0]); + const auto y = GetSrc(inst.src[1]); + const auto z = GetSrc(inst.src[2]); + + const auto two{ir.Imm32(4.f)}; + const IR::F32 x_major_axis{ir.FPMul(x, two)}; + const IR::F32 y_major_axis{ir.FPMul(y, two)}; + const IR::F32 z_major_axis{ir.FPMul(z, two)}; + + const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)}; + SetDst(inst.dst[0], result); } void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) { diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 006dfe15e..b5f94cd8c 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -81,16 +81,36 @@ struct ImageResource { if (base_type == AmdGpu::ImageType::Color2DArray && !is_array) { return AmdGpu::ImageType::Color2D; } - if (image.IsPartialCubemap()) { - // Partial cube map - return AmdGpu::ImageType::Color2DArray; + if (base_type == AmdGpu::ImageType::Color2DMsaaArray && !is_array) { + return AmdGpu::ImageType::Color2DMsaa; } return base_type; } + [[nodiscard]] u32 NumViewLevels(const AmdGpu::Image& image) const noexcept { + switch (GetBoundType(image)) { + case AmdGpu::ImageType::Color2DMsaa: + case AmdGpu::ImageType::Color2DMsaaArray: + return 1; + default: + return image.last_level - image.base_level + 1; + } + } + + [[nodiscard]] u32 NumViewLayers(const AmdGpu::Image image) const noexcept { + switch (GetBoundType(image)) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DMsaa: + case AmdGpu::ImageType::Color3D: + return 1; + default: + return image.last_array - image.base_array + 1; + } + } + [[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept { - // Need cube as storage when used with ImageRead. - return is_written || (is_read && GetBoundType(image) == AmdGpu::ImageType::Cube); + return is_written; } [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; @@ -206,8 +226,6 @@ struct Info { u64 pgm_hash{}; VAddr pgm_base; bool has_storage_images{}; - bool has_cube_arrays{}; - bool has_storage_cube_arrays{}; bool has_image_buffers{}; bool has_texel_buffers{}; bool has_discard{}; diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index fa17916e7..c3d171585 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -560,32 +560,6 @@ void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) { } } -IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, - const IR::Value& z, bool is_written, bool is_array) { - // When cubemap is written with imageStore it is treated like 2DArray. - if (is_written) { - return ir.CompositeConstruct(s, t, z); - } - - ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below - - // We need to fix x and y coordinate, - // because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32. - // We already force the scale value to be 1.0 when handling v_cubema_f32, - // here we subtract 1.5 to recover the original value. - const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f)); - const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f)); - if (is_array) { - const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z}); - const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u)); - const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u)); - return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id), - ir.ConvertIToF(32, 32, false, slice_id)); - } else { - return ir.CompositeConstruct(x, y, z); - } -} - void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, const AmdGpu::Image& image) { const auto handle = inst.Arg(0); @@ -649,7 +623,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, case AmdGpu::ImageType::Color2DMsaa: return ir.CompositeConstruct(read(0), read(8)); case AmdGpu::ImageType::Color3D: - case AmdGpu::ImageType::Cube: return ir.CompositeConstruct(read(0), read(8), read(16)); default: UNREACHABLE(); @@ -675,7 +648,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)), ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))}; case AmdGpu::ImageType::Color3D: - case AmdGpu::ImageType::Cube: // (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy) addr_reg = addr_reg + 6; return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5), @@ -725,10 +697,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, addr_reg = addr_reg + 3; return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), get_coord(addr_reg - 1, 2)); - case AmdGpu::ImageType::Cube: // x, y, face - addr_reg = addr_reg + 3; - return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1), - get_addr_reg(addr_reg - 1), false, inst_info.is_array); default: UNREACHABLE(); } @@ -806,10 +774,6 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { [[fallthrough]]; case AmdGpu::ImageType::Color3D: // x, y, z, [lod] return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)}; - case AmdGpu::ImageType::Cube: // x, y, face, [lod] - return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2), - inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array), - body->Arg(3)}; default: UNREACHABLE_MSG("Unknown image type {}", image.GetType()); } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 4c265b3e0..7fd5b75ff 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -87,16 +87,6 @@ void Visit(Info& info, const IR::Inst& inst) { } } -void VisitImage(Info& info, const ImageResource& image) { - const auto sharp = image.GetSharp(info); - if (image.GetBoundType(sharp) == AmdGpu::ImageType::Cube && image.is_array) { - info.has_cube_arrays = true; - if (image.IsStorage(sharp)) { - info.has_storage_cube_arrays = true; - } - } -} - void CollectShaderInfoPass(IR::Program& program) { Info& info{program.info}; for (IR::Block* const block : program.post_order_blocks) { @@ -104,9 +94,6 @@ void CollectShaderInfoPass(IR::Program& program) { Visit(info, inst); } } - for (const auto& image : program.info.images) { - VisitImage(info, image); - } } } // namespace Shader::Optimization diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index e4e442498..b8c20dcc2 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -226,15 +226,13 @@ struct Image { return pitch + 1; } - u32 NumLayers(bool is_array) const { - u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1; - if (GetType() == ImageType::Cube) { - if (is_array) { - slices = last_array + 1; - ASSERT(slices % 6 == 0); - } else { - slices = 6; - } + [[nodiscard]] u32 NumLayers() const noexcept { + u32 slices = depth + 1; + const auto img_type = static_cast(type); + if (img_type == ImageType::Color3D) { + slices = 1; + } else if (img_type == ImageType::Cube) { + slices *= 6; } if (pow2pad) { slices = std::bit_ceil(slices); @@ -257,7 +255,8 @@ struct Image { } ImageType GetType() const noexcept { - return static_cast(type); + const auto img_type = static_cast(type); + return img_type == ImageType::Cube ? ImageType::Color2DArray : img_type; } DataFormat GetDataFmt() const noexcept { @@ -288,11 +287,6 @@ struct Image { return GetDataFmt() >= DataFormat::FormatFmask8_1 && GetDataFmt() <= DataFormat::FormatFmask64_8; } - - bool IsPartialCubemap() const { - const auto viewed_slice = last_array - base_array + 1; - return GetType() == ImageType::Cube && viewed_slice < 6; - } }; static_assert(sizeof(Image) == 32); // 256bits diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index 23249bf21..96881c564 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -153,13 +153,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, // the texture cache should re-create the resource with the usage requested vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat | vk::ImageCreateFlagBits::eExtendedUsage}; - const bool can_be_cube = - (info.type == vk::ImageType::e2D) && - ((info.props.is_pow2 ? (info.resources.layers % 8) : (info.resources.layers % 6)) == 0) && - (info.size.width == info.size.height); - if (info.props.is_cube || can_be_cube) { - flags |= vk::ImageCreateFlagBits::eCubeCompatible; - } else if (info.props.is_volume) { + if (info.props.is_volume) { flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; } diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index bdbaecda6..58c2a8e23 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -37,7 +37,6 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept { return vk::ImageType::e1D; case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color2DMsaa: - case AmdGpu::ImageType::Cube: case AmdGpu::ImageType::Color2DArray: return vk::ImageType::e2D; case AmdGpu::ImageType::Color3D: @@ -130,7 +129,6 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de } type = ConvertImageType(image.GetType()); props.is_tiled = image.IsTiled(); - props.is_cube = image.GetType() == AmdGpu::ImageType::Cube; props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D; props.is_pow2 = image.pow2pad; props.is_block = IsBlockCoded(); @@ -139,7 +137,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de size.depth = props.is_volume ? image.depth + 1 : 1; pitch = image.Pitch(); resources.levels = image.NumLevels(); - resources.layers = image.NumLayers(desc.is_array); + resources.layers = image.NumLayers(); num_samples = image.NumSamples(); num_bits = NumBits(image.GetDataFmt()); diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h index 6faca49c5..123540c1e 100644 --- a/src/video_core/texture_cache/image_info.h +++ b/src/video_core/texture_cache/image_info.h @@ -61,7 +61,6 @@ struct ImageInfo { } meta_info{}; struct { - u32 is_cube : 1; u32 is_volume : 1; u32 is_tiled : 1; u32 is_pow2 : 1; diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 73fe8f35e..22ede4402 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -20,8 +20,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) { case AmdGpu::ImageType::Color2D: case AmdGpu::ImageType::Color2DMsaa: return vk::ImageViewType::e2D; - case AmdGpu::ImageType::Cube: - return vk::ImageViewType::eCube; case AmdGpu::ImageType::Color2DArray: return vk::ImageViewType::e2DArray; case AmdGpu::ImageType::Color3D: @@ -42,31 +40,13 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso if (desc.is_depth) { format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format); } + range.base.level = image.base_level; range.base.layer = image.base_array; - if (image.GetType() == AmdGpu::ImageType::Color2DMsaa || - image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) { - range.extent.levels = 1; - } else { - range.extent.levels = image.last_level - image.base_level + 1; - } - range.extent.layers = image.last_array - image.base_array + 1; + range.extent.levels = desc.NumViewLevels(image); + range.extent.layers = desc.NumViewLayers(image); type = ConvertImageViewType(desc.GetBoundType(image)); - // Adjust view type for arrays - if (type == vk::ImageViewType::eCube) { - if (desc.is_array) { - type = vk::ImageViewType::eCubeArray; - } else { - // Some games try to bind an array of cubemaps while shader reads only single one. - range.extent.layers = std::min(range.extent.layers, 6u); - } - } - if (type == vk::ImageViewType::e3D && range.extent.layers > 1) { - // Some games pass incorrect layer count for 3D textures so we need to fixup it. - range.extent.layers = 1; - } - if (!is_storage) { mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect()); }