Handle atomic instructions

This commit is contained in:
Lander Gallastegi 2024-11-14 19:44:02 +01:00
parent fe3b41c64b
commit 0fd4c3b307

View File

@ -418,43 +418,34 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
} }
} }
void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image& image, void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image& image) {
bool is_write) {
if (!image.NeedsNormalizationPatch()) { if (!image.NeedsNormalizationPatch()) {
return; return;
} }
bool is_signed = image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm; bool is_signed = image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm;
bool is_atomic = IsImageAtomicInstruction(inst);
bool is_write = is_atomic || inst.GetOpcode() == IR::Opcode::ImageWrite;
int num_components = AmdGpu::NumComponents(image.GetDataFmt()); int num_components = AmdGpu::NumComponents(image.GetDataFmt());
IR::F32 multipier = ir.Imm32(is_signed ? 2147483647.0f : 4294967295.0f); IR::F32 multipier = ir.Imm32(is_signed ? 2147483647.0f : 4294967295.0f);
if (!is_write) const auto get_mul_vec = [&]() -> IR::F32 {
multipier = ir.FPRecip(multipier); switch (num_components) {
case 1:
switch (num_components) { return multipier;
case 1: case 2:
break; return IR::F32{ir.CompositeConstruct(multipier, multipier)};
case 2: case 3:
multipier = IR::F32{ir.CompositeConstruct(multipier, multipier)}; return IR::F32{ir.CompositeConstruct(multipier, multipier, multipier)};
case 3: case 4:
multipier = IR::F32{ir.CompositeConstruct(multipier, multipier, multipier)}; return IR::F32{ir.CompositeConstruct(multipier, multipier, multipier, multipier)};
case 4: default:
multipier = IR::F32{ir.CompositeConstruct(multipier, multipier, multipier, multipier)}; UNREACHABLE();
default:
UNREACHABLE();
}
if (is_write) {
IR::F32 data = IR::F32{inst.Arg(2)};
data = ir.FPMul(data, multipier);
if (is_signed) {
inst.SetArg(2, ir.ConvertFToS(32, data));
} else {
inst.SetArg(2, ir.ConvertFToU(32, data));
} }
} else { };
const auto patch_read = [&]() {
IR::Value data = IR::Value(ir.CopyInst(inst)); IR::Value data = IR::Value(ir.CopyInst(inst));
if (is_signed) { if (is_signed) {
data = ir.ConvertSToF(32, 32, data); data = ir.ConvertSToF(32, 32, data);
@ -462,8 +453,28 @@ void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image&
data = ir.ConvertUToF(32, 32, data); data = ir.ConvertUToF(32, 32, data);
} }
data = ir.FPMul(IR::F32(data), multipier); data = ir.FPMul(IR::F32(data), get_mul_vec());
inst.ReplaceUsesWith(data); inst.ReplaceUsesWith(data);
};
if (is_write) {
IR::F32 data = IR::F32{inst.Arg(2)};
data = ir.FPMul(data, get_mul_vec());
if (is_signed) {
inst.SetArg(2, ir.ConvertFToS(32, data));
} else {
inst.SetArg(2, ir.ConvertFToU(32, data));
}
// Atomic instructions return the old value, so we need to patch the read.
if (is_atomic) {
multipier = ir.FPRecip(multipier);
patch_read();
}
} else {
multipier = ir.FPRecip(multipier);
patch_read();
} }
} }
@ -649,7 +660,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
}(); }();
inst.ReplaceUsesWith(new_inst); inst.ReplaceUsesWith(new_inst);
PatchNormalization(*new_inst.Inst(), ir, image, false); PatchNormalization(*new_inst.Inst(), ir, image);
} }
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
@ -777,7 +788,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
inst.SetArg(4, arg); inst.SetArg(4, arg);
} }
PatchNormalization(inst, ir, image, inst.GetOpcode() == IR::Opcode::ImageWrite); PatchNormalization(inst, ir, image);
} }
void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,