shader_recompiler: Improvements to array and cube handling. (#2083)

* shader_recompiler: Account for instruction array flag in image type.

* shader_recompiler: Check da flag for all mimg instructions.

* shader_recompiler: Convert cube images into 2D arrays.

* shader_recompiler: Move image resource functions into sharp type.

* shader_recompiler: Use native AMD cube instructions when possible.

* specialization: Fix buffer storage mistake.
This commit is contained in:
squidbus
2025-01-10 00:48:12 -08:00
committed by GitHub
parent 93402620de
commit 725814ce01
28 changed files with 217 additions and 144 deletions

View File

@@ -301,6 +301,9 @@ private:
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
void LogMissingOpcode(const GcnInst& inst);
private:

View File

@@ -3,6 +3,7 @@
#include "shader_recompiler/frontend/opcodes.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/profile.h"
namespace Shader::Gcn {
@@ -1042,20 +1043,92 @@ void Translator::V_MAD_U32_U24(const GcnInst& inst) {
V_MAD_I32_I24(inst, false);
}
IR::F32 Translator::SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
const IR::F32& x_res, const IR::F32& y_res,
const IR::F32& z_res) {
const auto abs_x = ir.FPAbs(x);
const auto abs_y = ir.FPAbs(y);
const auto abs_z = ir.FPAbs(z);
const auto z_face_cond{
ir.LogicalAnd(ir.FPGreaterThanEqual(abs_z, abs_x), ir.FPGreaterThanEqual(abs_z, abs_y))};
const auto y_face_cond{ir.FPGreaterThanEqual(abs_y, abs_x)};
return IR::F32{ir.Select(z_face_cond, z_res, ir.Select(y_face_cond, y_res, x_res))};
}
void Translator::V_CUBEID_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[2]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
result = ir.CubeFaceIndex(ir.CompositeConstruct(x, y, z));
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_face{ir.Select(x_neg_cond, ir.Imm32(5.f), ir.Imm32(4.f))};
const IR::F32 y_face{ir.Select(y_neg_cond, ir.Imm32(3.f), ir.Imm32(2.f))};
const IR::F32 z_face{ir.Select(z_neg_cond, ir.Imm32(1.f), ir.Imm32(0.f))};
result = SelectCubeResult(x, y, z, x_face, y_face, z_face);
}
SetDst(inst.dst[0], result);
}
void Translator::V_CUBESC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[0]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 0)};
} else {
const auto x_neg_cond{ir.FPLessThan(x, ir.Imm32(0.f))};
const auto z_neg_cond{ir.FPLessThan(z, ir.Imm32(0.f))};
const IR::F32 x_sc{ir.Select(x_neg_cond, ir.FPNeg(x), x)};
const IR::F32 z_sc{ir.Select(z_neg_cond, z, ir.FPNeg(z))};
result = SelectCubeResult(x, y, z, x_sc, x, z_sc);
}
SetDst(inst.dst[0], result);
}
void Translator::V_CUBETC_F32(const GcnInst& inst) {
SetDst(inst.dst[0], GetSrc<IR::F32>(inst.src[1]));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
IR::F32 result;
if (profile.supports_native_cube_calc) {
const auto coords{ir.CubeFaceCoord(ir.CompositeConstruct(x, y, z))};
result = IR::F32{ir.CompositeExtract(coords, 1)};
} else {
const auto y_neg_cond{ir.FPLessThan(y, ir.Imm32(0.f))};
const IR::F32 x_z_sc{ir.FPNeg(y)};
const IR::F32 y_sc{ir.Select(y_neg_cond, ir.FPNeg(z), z)};
result = SelectCubeResult(x, y, z, x_z_sc, y_sc, x_z_sc);
}
SetDst(inst.dst[0], result);
}
void Translator::V_CUBEMA_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.Imm32(1.f));
const auto x = GetSrc<IR::F32>(inst.src[0]);
const auto y = GetSrc<IR::F32>(inst.src[1]);
const auto z = GetSrc<IR::F32>(inst.src[2]);
const auto two{ir.Imm32(4.f)};
const IR::F32 x_major_axis{ir.FPMul(x, two)};
const IR::F32 y_major_axis{ir.FPMul(y, two)};
const IR::F32 z_major_axis{ir.FPMul(z, two)};
const auto result{SelectCubeResult(x, y, z, x_major_axis, y_major_axis, z_major_axis)};
SetDst(inst.dst[0], result);
}
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {

View File

@@ -418,6 +418,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
const IR::Value texel = ir.ImageRead(handle, body, {}, {}, info);
for (u32 i = 0; i < 4; i++) {
@@ -442,6 +443,7 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
IR::TextureInstInfo info{};
info.has_lod.Assign(has_mip);
info.is_array.Assign(mimg.da);
boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) {
@@ -456,13 +458,18 @@ void Translator::IMAGE_STORE(bool has_mip, const GcnInst& inst) {
}
void Translator::IMAGE_GET_RESINFO(const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg dst_reg{inst.dst[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const auto flags = ImageResFlags(inst.control.mimg.dmask);
const bool has_mips = flags.test(ImageResComponent::MipCount);
const IR::U32 lod = ir.GetVectorReg(IR::VectorReg(inst.src[0].code));
const IR::Value tsharp = ir.GetScalarReg(tsharp_reg);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips));
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value size = ir.ImageQueryDimension(tsharp, lod, ir.Imm1(has_mips), info);
if (flags.test(ImageResComponent::Width)) {
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(size, 0)});
@@ -484,6 +491,9 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
@@ -494,25 +504,25 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
case AtomicOp::Swap:
return ir.ImageAtomicExchange(handle, body, value, {});
case AtomicOp::Add:
return ir.ImageAtomicIAdd(handle, body, value, {});
return ir.ImageAtomicIAdd(handle, body, value, info);
case AtomicOp::Smin:
return ir.ImageAtomicIMin(handle, body, value, true, {});
return ir.ImageAtomicIMin(handle, body, value, true, info);
case AtomicOp::Umin:
return ir.ImageAtomicUMin(handle, body, value, {});
return ir.ImageAtomicUMin(handle, body, value, info);
case AtomicOp::Smax:
return ir.ImageAtomicIMax(handle, body, value, true, {});
return ir.ImageAtomicIMax(handle, body, value, true, info);
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, {});
return ir.ImageAtomicUMax(handle, body, value, info);
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, {});
return ir.ImageAtomicAnd(handle, body, value, info);
case AtomicOp::Or:
return ir.ImageAtomicOr(handle, body, value, {});
return ir.ImageAtomicOr(handle, body, value, info);
case AtomicOp::Xor:
return ir.ImageAtomicXor(handle, body, value, {});
return ir.ImageAtomicXor(handle, body, value, info);
case AtomicOp::Inc:
return ir.ImageAtomicInc(handle, body, value, {});
return ir.ImageAtomicInc(handle, body, value, info);
case AtomicOp::Dec:
return ir.ImageAtomicDec(handle, body, value, {});
return ir.ImageAtomicDec(handle, body, value, info);
default:
UNREACHABLE();
}
@@ -643,11 +653,14 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
IR::TextureInstInfo info{};
info.is_array.Assign(mimg.da);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
const IR::Value lod = ir.ImageQueryLod(handle, body, {});
const IR::Value lod = ir.ImageQueryLod(handle, body, info);
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 0)});
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
}