diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 36d36829b..8af2dcd6f 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -418,43 +418,34 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } } -void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image& image, - bool is_write) { +void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image& image) { if (!image.NeedsNormalizationPatch()) { return; } bool is_signed = image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm; + bool is_atomic = IsImageAtomicInstruction(inst); + bool is_write = is_atomic || inst.GetOpcode() == IR::Opcode::ImageWrite; int num_components = AmdGpu::NumComponents(image.GetDataFmt()); IR::F32 multipier = ir.Imm32(is_signed ? 2147483647.0f : 4294967295.0f); - if (!is_write) - multipier = ir.FPRecip(multipier); - - switch (num_components) { - case 1: - break; - case 2: - multipier = IR::F32{ir.CompositeConstruct(multipier, multipier)}; - case 3: - multipier = IR::F32{ir.CompositeConstruct(multipier, multipier, multipier)}; - case 4: - multipier = IR::F32{ir.CompositeConstruct(multipier, multipier, multipier, multipier)}; - default: - UNREACHABLE(); - } - - if (is_write) { - IR::F32 data = IR::F32{inst.Arg(2)}; - data = ir.FPMul(data, multipier); - - if (is_signed) { - inst.SetArg(2, ir.ConvertFToS(32, data)); - } else { - inst.SetArg(2, ir.ConvertFToU(32, data)); + const auto get_mul_vec = [&]() -> IR::F32 { + switch (num_components) { + case 1: + return multipier; + case 2: + return IR::F32{ir.CompositeConstruct(multipier, multipier)}; + case 3: + return IR::F32{ir.CompositeConstruct(multipier, multipier, multipier)}; + case 4: + return IR::F32{ir.CompositeConstruct(multipier, multipier, multipier, multipier)}; + default: + UNREACHABLE(); } - } else { + }; + + const auto patch_read = [&]() { IR::Value data = IR::Value(ir.CopyInst(inst)); if (is_signed) { data = ir.ConvertSToF(32, 32, data); @@ -462,8 +453,28 @@ void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image& data = ir.ConvertUToF(32, 32, data); } - data = ir.FPMul(IR::F32(data), multipier); + data = ir.FPMul(IR::F32(data), get_mul_vec()); inst.ReplaceUsesWith(data); + }; + + if (is_write) { + IR::F32 data = IR::F32{inst.Arg(2)}; + data = ir.FPMul(data, get_mul_vec()); + + if (is_signed) { + inst.SetArg(2, ir.ConvertFToS(32, data)); + } else { + inst.SetArg(2, ir.ConvertFToU(32, data)); + } + + // Atomic instructions return the old value, so we need to patch the read. + if (is_atomic) { + multipier = ir.FPRecip(multipier); + patch_read(); + } + } else { + multipier = ir.FPRecip(multipier); + patch_read(); } } @@ -649,7 +660,7 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, }(); inst.ReplaceUsesWith(new_inst); - PatchNormalization(*new_inst.Inst(), ir, image, false); + PatchNormalization(*new_inst.Inst(), ir, image); } void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { @@ -777,7 +788,7 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip inst.SetArg(4, arg); } - PatchNormalization(inst, ir, image, inst.GetOpcode() == IR::Opcode::ImageWrite); + PatchNormalization(inst, ir, image); } void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,