shader_recompiler: Convert cube images into 2D arrays.

This commit is contained in:
squidbus 2025-01-07 04:41:53 -08:00
parent 120e6ea28f
commit 501b921e49
14 changed files with 101 additions and 128 deletions

View File

@ -259,14 +259,6 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddCapability(spv::Capability::ImageReadWriteLodAMD);
}
}
if (info.has_cube_arrays) {
if (info.has_storage_cube_arrays) {
// Implies SampledCubeArray
ctx.AddCapability(spv::Capability::ImageCubeArray);
} else {
ctx.AddCapability(spv::Capability::SampledCubeArray);
}
}
if (info.has_texel_buffers) {
ctx.AddCapability(spv::Capability::SampledBuffer);
}

View File

@ -190,12 +190,6 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod
case AmdGpu::ImageType::Color2DArray:
case AmdGpu::ImageType::Color3D:
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
case AmdGpu::ImageType::Cube:
// Cube arrays do not have their own type to distinguish by.
if (texture.is_array) {
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[3]), mips());
}
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[2]), zero, mips());
default:
UNREACHABLE_MSG("SPIR-V Instruction");
}

View File

@ -788,9 +788,6 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
return ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, true, sampled, format);
case AmdGpu::ImageType::Color3D:
return ctx.TypeImage(sampled_type, spv::Dim::Dim3D, false, false, false, sampled, format);
case AmdGpu::ImageType::Cube:
return ctx.TypeImage(sampled_type, spv::Dim::Cube, false, desc.is_array, false, sampled,
format);
default:
break;
}
@ -820,7 +817,6 @@ void EmitContext::DefineImagesAndSamplers() {
.bound_type = image_desc.GetBoundType(sharp),
.is_integer = is_integer,
.is_storage = is_storage,
.is_array = image_desc.is_array,
});
interfaces.push_back(id);
}

View File

@ -225,7 +225,6 @@ public:
AmdGpu::ImageType bound_type;
bool is_integer = false;
bool is_storage = false;
bool is_array = false;
};
struct BufferDefinition {

View File

@ -301,6 +301,9 @@ private:
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
void LogMissingOpcode(const GcnInst& inst);
private:

View File

@ -1042,20 +1042,75 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
V_MAD_I32_I24(inst, false);
}
IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res,
const IR::F32& z_res) {
const auto abs_x = ir.FPAbs(x);
const auto abs_y = ir.FPAbs(y);
const auto abs_z = ir.FPAbs(z);
const auto z_face_cond{
ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))};
const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)};
return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))};
}
void Translator::V_CUBEID_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
const auto result{SelectCubeResult(x, y, z, x_face, y_face, z_face)};
SetDst(inst.dst[0], result);
}
void Translator::V_CUBESC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
const auto result{SelectCubeResult(x, y, z, x_sc, x, z_sc)};
SetDst(inst.dst[0], result);
}
void Translator::V_CUBETC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const IR::F32 x_z_sc{ir.FPNeg(y)};
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
const auto result{SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc)};
SetDst(inst.dst[0], result);
}
void Translator::V_CUBEMA_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.Imm32(1.f));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto two{ir.Imm32(4.f)};
const IR::F32 x_major_axis{ir.FPMul(x, two)};
const IR::F32 y_major_axis{ir.FPMul(y, two)};
const IR::F32 z_major_axis{ir.FPMul(z, two)};
const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)};
SetDst(inst.dst[0], result);
}
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {

View File

@ -81,16 +81,36 @@ struct ImageResource {
if (base_type == AmdGpu::ImageType::Color2DArray && !is_array) {
return AmdGpu::ImageType::Color2D;
}
if (image.IsPartialCubemap()) {
// Partial cube map
return AmdGpu::ImageType::Color2DArray;
if (base_type == AmdGpu::ImageType::Color2DMsaaArray && !is_array) {
return AmdGpu::ImageType::Color2DMsaa;
}
return base_type;
}
[[nodiscard]] u32 NumViewLevels(const AmdGpu::Image& image) const noexcept {
switch (GetBoundType(image)) {
case AmdGpu::ImageType::Color2DMsaa:
case AmdGpu::ImageType::Color2DMsaaArray:
return 1;
default:
return image.last_level - image.base_level + 1;
}
}
[[nodiscard]] u32 NumViewLayers(const AmdGpu::Image image) const noexcept {
switch (GetBoundType(image)) {
case AmdGpu::ImageType::Color1D:
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color2DMsaa:
case AmdGpu::ImageType::Color3D:
return 1;
default:
return image.last_array - image.base_array + 1;
}
}
[[nodiscard]] bool IsStorage(const AmdGpu::Image& image) const noexcept {
// Need cube as storage when used with ImageRead.
return is_written || (is_read && GetBoundType(image) == AmdGpu::ImageType::Cube);
return is_written;
}
[[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
@ -206,8 +226,6 @@ struct Info {
u64 pgm_hash{};
VAddr pgm_base;
bool has_storage_images{};
bool has_cube_arrays{};
bool has_storage_cube_arrays{};
bool has_image_buffers{};
bool has_texel_buffers{};
bool has_discard{};

View File

@ -560,32 +560,6 @@ void PatchTextureBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
}
}
IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t,
const IR::Value& z, bool is_written, bool is_array) {
// When cubemap is written with imageStore it is treated like 2DArray.
if (is_written) {
return ir.CompositeConstruct(s, t, z);
}
ASSERT(s.Type() == IR::Type::F32); // in case of fetched image need to adjust the code below
// We need to fix x and y coordinate,
// because the s and t coordinate will be scaled and plus 1.5 by v_madak_f32.
// We already force the scale value to be 1.0 when handling v_cubema_f32,
// here we subtract 1.5 to recover the original value.
const IR::Value x = ir.FPSub(IR::F32{s}, ir.Imm32(1.5f));
const IR::Value y = ir.FPSub(IR::F32{t}, ir.Imm32(1.5f));
if (is_array) {
const IR::U32 array_index = ir.ConvertFToU(32, IR::F32{z});
const IR::U32 face_id = ir.BitwiseAnd(array_index, ir.Imm32(7u));
const IR::U32 slice_id = ir.ShiftRightLogical(array_index, ir.Imm32(3u));
return ir.CompositeConstruct(x, y, ir.ConvertIToF(32, 32, false, face_id),
ir.ConvertIToF(32, 32, false, slice_id));
} else {
return ir.CompositeConstruct(x, y, z);
}
}
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
const AmdGpu::Image& image) {
const auto handle = inst.Arg(0);
@ -649,7 +623,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
case AmdGpu::ImageType::Color2DMsaa:
return ir.CompositeConstruct(read(0), read(8));
case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Cube:
return ir.CompositeConstruct(read(0), read(8), read(16));
default:
UNREACHABLE();
@ -675,7 +648,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)),
ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))};
case AmdGpu::ImageType::Color3D:
case AmdGpu::ImageType::Cube:
// (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy)
addr_reg = addr_reg + 6;
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5),
@ -725,10 +697,6 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
addr_reg = addr_reg + 3;
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_coord(addr_reg - 1, 2));
case AmdGpu::ImageType::Cube: // x, y, face
addr_reg = addr_reg + 3;
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
default:
UNREACHABLE();
}
@ -806,10 +774,6 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
[[fallthrough]];
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return {ir.CompositeConstruct(body->Arg(0), body->Arg(1), body->Arg(2)), body->Arg(3)};
case AmdGpu::ImageType::Cube: // x, y, face, [lod]
return {PatchCubeCoord(ir, body->Arg(0), body->Arg(1), body->Arg(2),
inst.GetOpcode() == IR::Opcode::ImageWrite, inst_info.is_array),
body->Arg(3)};
default:
UNREACHABLE_MSG("Unknown image type {}", image.GetType());
}

View File

@ -87,16 +87,6 @@ void Visit(Info& info, const IR::Inst& inst) {
}
}
void VisitImage(Info& info, const ImageResource& image) {
const auto sharp = image.GetSharp(info);
if (image.GetBoundType(sharp) == AmdGpu::ImageType::Cube && image.is_array) {
info.has_cube_arrays = true;
if (image.IsStorage(sharp)) {
info.has_storage_cube_arrays = true;
}
}
}
void CollectShaderInfoPass(IR::Program& program) {
Info& info{program.info};
for (IR::Block* const block : program.post_order_blocks) {
@ -104,9 +94,6 @@ void CollectShaderInfoPass(IR::Program& program) {
Visit(info, inst);
}
}
for (const auto& image : program.info.images) {
VisitImage(info, image);
}
}
} // namespace Shader::Optimization

View File

@ -226,15 +226,13 @@ struct Image {
return pitch + 1;
}
u32 NumLayers(bool is_array) const {
u32 slices = GetType() == ImageType::Color3D ? 1 : depth + 1;
if (GetType() == ImageType::Cube) {
if (is_array) {
slices = last_array + 1;
ASSERT(slices % 6 == 0);
} else {
slices = 6;
}
[[nodiscard]] u32 NumLayers() const noexcept {
u32 slices = depth + 1;
const auto img_type = static_cast<ImageType>(type);
if (img_type == ImageType::Color3D) {
slices = 1;
} else if (img_type == ImageType::Cube) {
slices *= 6;
}
if (pow2pad) {
slices = std::bit_ceil(slices);
@ -257,7 +255,8 @@ struct Image {
}
ImageType GetType() const noexcept {
return static_cast<ImageType>(type);
const auto img_type = static_cast<ImageType>(type);
return img_type == ImageType::Cube ? ImageType::Color2DArray : img_type;
}
DataFormat GetDataFmt() const noexcept {
@ -288,11 +287,6 @@ struct Image {
return GetDataFmt() >= DataFormat::FormatFmask8_1 &&
GetDataFmt() <= DataFormat::FormatFmask64_8;
}
bool IsPartialCubemap() const {
const auto viewed_slice = last_array - base_array + 1;
return GetType() == ImageType::Cube && viewed_slice < 6;
}
};
static_assert(sizeof(Image) == 32); // 256bits

View File

@ -153,13 +153,7 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
// the texture cache should re-create the resource with the usage requested
vk::ImageCreateFlags flags{vk::ImageCreateFlagBits::eMutableFormat |
vk::ImageCreateFlagBits::eExtendedUsage};
const bool can_be_cube =
(info.type == vk::ImageType::e2D) &&
((info.props.is_pow2 ? (info.resources.layers % 8) : (info.resources.layers % 6)) == 0) &&
(info.size.width == info.size.height);
if (info.props.is_cube || can_be_cube) {
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
} else if (info.props.is_volume) {
if (info.props.is_volume) {
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
}

View File

@ -37,7 +37,6 @@ static vk::ImageType ConvertImageType(AmdGpu::ImageType type) noexcept {
return vk::ImageType::e1D;
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color2DMsaa:
case AmdGpu::ImageType::Cube:
case AmdGpu::ImageType::Color2DArray:
return vk::ImageType::e2D;
case AmdGpu::ImageType::Color3D:
@ -130,7 +129,6 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
}
type = ConvertImageType(image.GetType());
props.is_tiled = image.IsTiled();
props.is_cube = image.GetType() == AmdGpu::ImageType::Cube;
props.is_volume = image.GetType() == AmdGpu::ImageType::Color3D;
props.is_pow2 = image.pow2pad;
props.is_block = IsBlockCoded();
@ -139,7 +137,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& de
size.depth = props.is_volume ? image.depth + 1 : 1;
pitch = image.Pitch();
resources.levels = image.NumLevels();
resources.layers = image.NumLayers(desc.is_array);
resources.layers = image.NumLayers();
num_samples = image.NumSamples();
num_bits = NumBits(image.GetDataFmt());

View File

@ -61,7 +61,6 @@ struct ImageInfo {
} meta_info{};
struct {
u32 is_cube : 1;
u32 is_volume : 1;
u32 is_tiled : 1;
u32 is_pow2 : 1;

View File

@ -20,8 +20,6 @@ vk::ImageViewType ConvertImageViewType(AmdGpu::ImageType type) {
case AmdGpu::ImageType::Color2D:
case AmdGpu::ImageType::Color2DMsaa:
return vk::ImageViewType::e2D;
case AmdGpu::ImageType::Cube:
return vk::ImageViewType::eCube;
case AmdGpu::ImageType::Color2DArray:
return vk::ImageViewType::e2DArray;
case AmdGpu::ImageType::Color3D:
@ -42,31 +40,13 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso
if (desc.is_depth) {
format = Vulkan::LiverpoolToVK::PromoteFormatToDepth(format);
}
range.base.level = image.base_level;
range.base.layer = image.base_array;
if (image.GetType() == AmdGpu::ImageType::Color2DMsaa ||
image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
range.extent.levels = 1;
} else {
range.extent.levels = image.last_level - image.base_level + 1;
}
range.extent.layers = image.last_array - image.base_array + 1;
range.extent.levels = desc.NumViewLevels(image);
range.extent.layers = desc.NumViewLayers(image);
type = ConvertImageViewType(desc.GetBoundType(image));
// Adjust view type for arrays
if (type == vk::ImageViewType::eCube) {
if (desc.is_array) {
type = vk::ImageViewType::eCubeArray;
} else {
// Some games try to bind an array of cubemaps while shader reads only single one.
range.extent.layers = std::min(range.extent.layers, 6u);
}
}
if (type == vk::ImageViewType::e3D && range.extent.layers > 1) {
// Some games pass incorrect layer count for 3D textures so we need to fixup it.
range.extent.layers = 1;
}
if (!is_storage) {
mapping = Vulkan::LiverpoolToVK::ComponentMapping(image.DstSelect());
}