shader_recompiler: Implement guest barycentrics (#3245)

* shader_recompiler: Implement guest barycentrics

* Review comments and some cleanup
This commit is contained in:
TheTurtle
2025-07-15 18:49:12 +03:00
committed by GitHub
parent 87f6cce7b1
commit 4407ebdd9b
17 changed files with 314 additions and 229 deletions

View File

@@ -21,50 +21,39 @@
namespace Shader::Gcn {
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
next_vgpr_num{runtime_info.num_allocated_vgprs} {
if (info.l_stage == LogicalStage::Fragment) {
dst_frag_vreg = GatherInterpQualifiers();
static IR::VectorReg IterateBarycentrics(const RuntimeInfo& runtime_info, auto&& set_attribute) {
if (runtime_info.stage != Stage::Fragment) {
return IR::VectorReg::V0;
}
}
IR::VectorReg Translator::GatherInterpQualifiers() {
u32 dst_vreg{};
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J
info.has_perspective_interp = true;
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 0); // I
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 1); // J
}
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J
info.has_perspective_interp = true;
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 0); // I
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 1); // J
}
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J
info.has_perspective_interp = true;
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 0); // I
set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 1); // J
}
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
++dst_vreg; // I/W
++dst_vreg; // J/W
++dst_vreg; // 1/W
set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 0); // I/W
set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 1); // J/W
set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 2); // 1/W
}
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J
info.has_linear_interp = true;
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 0); // I
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 1); // J
}
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J
info.has_linear_interp = true;
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 0); // I
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 1); // J
}
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I
vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J
info.has_linear_interp = true;
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 0); // I
set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 1); // J
}
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
++dst_vreg;
@@ -72,6 +61,14 @@ IR::VectorReg Translator::GatherInterpQualifiers() {
return IR::VectorReg(dst_vreg);
}
Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
: info{info_}, runtime_info{runtime_info_}, profile{profile_},
next_vgpr_num{runtime_info.num_allocated_vgprs} {
IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32) {
vgpr_to_interp[vreg] = attrib;
});
}
void Translator::EmitPrologue(IR::Block* first_block) {
ir = IR::IREmitter(*first_block, first_block->begin());
@@ -127,7 +124,10 @@ void Translator::EmitPrologue(IR::Block* first_block) {
}
break;
case LogicalStage::Fragment:
dst_vreg = dst_frag_vreg;
dst_vreg =
IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32 comp) {
ir.SetVectorReg(IR::VectorReg(vreg), ir.GetAttribute(attrib, comp));
});
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
@@ -151,7 +151,8 @@ void Translator::EmitPrologue(IR::Block* first_block) {
}
if (runtime_info.fs_info.addr_flags.pos_w_float_ena) {
if (runtime_info.fs_info.en_flags.pos_w_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3));
ir.SetVectorReg(dst_vreg++,
ir.FPRecip(ir.GetAttribute(IR::Attribute::FragCoord, 3)));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
}

View File

@@ -265,6 +265,7 @@ public:
// Vector interpolation
// VINTRP
void V_INTERP_P1_F32(const GcnInst& inst);
void V_INTERP_P2_F32(const GcnInst& inst);
void V_INTERP_MOV_F32(const GcnInst& inst);
@@ -323,7 +324,6 @@ private:
void LogMissingOpcode(const GcnInst& inst);
IR::VectorReg GetScratchVgpr(u32 offset);
IR::VectorReg GatherInterpQualifiers();
private:
IR::IREmitter ir;
@@ -332,8 +332,7 @@ private:
const Profile& profile;
u32 next_vgpr_num;
std::unordered_map<u32, IR::VectorReg> vgpr_map;
std::array<IR::Interpolation, MaxInterpVgpr> vgpr_to_interp{};
IR::VectorReg dst_frag_vreg{};
std::array<IR::Attribute, MaxInterpVgpr> vgpr_to_interp{};
bool opcode_missing = false;
};

View File

@@ -5,11 +5,32 @@
namespace Shader::Gcn {
using Interpolation = Info::Interpolation;
static Interpolation GetInterpolation(IR::Attribute attribute) {
switch (attribute) {
case IR::Attribute::BaryCoordNoPersp:
return {Qualifier::NoPerspective, Qualifier::None};
case IR::Attribute::BaryCoordNoPerspCentroid:
return {Qualifier::NoPerspective, Qualifier::Centroid};
case IR::Attribute::BaryCoordNoPerspSample:
return {Qualifier::NoPerspective, Qualifier::Sample};
case IR::Attribute::BaryCoordSmooth:
return {Qualifier::Smooth, Qualifier::None};
case IR::Attribute::BaryCoordSmoothCentroid:
return {Qualifier::Smooth, Qualifier::Centroid};
case IR::Attribute::BaryCoordSmoothSample:
return {Qualifier::Smooth, Qualifier::Sample};
default:
UNREACHABLE_MSG("Unhandled barycentric attribute {}", NameOf(attribute));
}
}
void Translator::EmitVectorInterpolation(const GcnInst& inst) {
switch (inst.opcode) {
// VINTRP
case Opcode::V_INTERP_P1_F32:
return;
return V_INTERP_P1_F32(inst);
case Opcode::V_INTERP_P2_F32:
return V_INTERP_P2_F32(inst);
case Opcode::V_INTERP_MOV_F32:
@@ -21,19 +42,57 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) {
// VINTRP
void Translator::V_INTERP_P1_F32(const GcnInst& inst) {
if (!profile.needs_manual_interpolation) {
return;
}
// VDST = P10 * VSRC + P0
const u32 attr_index = inst.control.vintrp.attr;
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0);
const IR::F32 p1 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 1);
const IR::F32 i = GetSrc<IR::F32>(inst.src[0]);
const IR::F32 result = ir.FPFma(ir.FPSub(p1, p0), i, p0);
SetDst(inst.dst[0], result);
}
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
const u32 attr_index = inst.control.vintrp.attr;
const auto& attr = runtime_info.fs_info.inputs.at(attr_index);
info.interp_qualifiers[attr_index] = vgpr_to_interp[inst.src[0].code];
const IR::Attribute attrib{IR::Attribute::Param0 + attr_index};
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
const auto& attr = runtime_info.fs_info.inputs[attr_index];
auto& interp = info.fs_interpolation[attr_index];
ASSERT(!attr.IsDefault() && !attr.is_flat);
if (!profile.needs_manual_interpolation) {
interp = GetInterpolation(vgpr_to_interp[inst.src[0].code]);
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
return;
}
// VDST = P20 * VSRC + VDST
const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0);
const IR::F32 p2 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 2);
const IR::F32 j = GetSrc<IR::F32>(inst.src[0]);
const IR::F32 result = ir.FPFma(ir.FPSub(p2, p0), j, GetSrc<IR::F32>(inst.dst[0]));
interp.primary = Qualifier::PerVertex;
SetDst(inst.dst[0], result);
}
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
const u32 attr_index = inst.control.vintrp.attr;
const auto& attr = runtime_info.fs_info.inputs.at(attr_index);
const IR::Attribute attrib{IR::Attribute::Param0 + attr_index};
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
const IR::Attribute attrib = IR::Attribute::Param0 + attr_index;
const auto& attr = runtime_info.fs_info.inputs[attr_index];
auto& interp = info.fs_interpolation[attr_index];
ASSERT(attr.is_flat);
if (profile.supports_amd_shader_explicit_vertex_parameter ||
(profile.supports_fragment_shader_barycentric &&
!profile.has_incomplete_fragment_shader_barycentric)) {
// VSRC 0=P10, 1=P20, 2=P0
interp.primary = Qualifier::PerVertex;
SetDst(inst.dst[0],
ir.GetAttribute(attrib, inst.control.vintrp.chan, (inst.src[0].code + 1) % 3));
} else {
interp.primary = Qualifier::Flat;
SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan));
}
}
} // namespace Shader::Gcn