shader_recompiler: Implement most integer image atomics, workgroup barriers and shared memory load/store (#231)

* shader_recompiler: Add LDEXP

* shader_recompiler: Add most image integer atomic ops

* shader_recompiler: Implement shared memory load/store

* shader_recompiler: More image atomics

* externals: Update sirit

* clang format

* cmake: Add missing files

* shader_recompiler: Fix some atomic bugs

* shader_recompiler: Vs outputs

* shader_recompiler: Shared mem has side-effects, fix format component order

* shader_recompiler: Inline constant buffer impl

* video_core: Fix regressions

* Work

* Fixup a few things
This commit is contained in:
TheTurtle
2024-07-05 00:15:44 +03:00
committed by GitHub
parent af3bbc33e9
commit 6ceab6dfac
69 changed files with 1597 additions and 310 deletions

View File

@@ -22,16 +22,18 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnIn
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
const IR::VectorReg dst_reg{inst.dst[0].code};
if (is_pair) {
// Pair loads are either 32 or 64-bit. We assume 32-bit for now.
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.SetVectorReg(dst_reg, ir.ReadShared(32, is_signed, addr0));
ir.SetVectorReg(dst_reg, IR::U32{ir.LoadShared(32, is_signed, addr0)});
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.SetVectorReg(dst_reg + 1, ir.ReadShared(32, is_signed, addr1));
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.LoadShared(32, is_signed, addr1)});
} else if (bit_size == 64) {
const IR::Value data = ir.UnpackUint2x32(ir.ReadShared(bit_size, is_signed, addr));
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr);
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
} else {
const IR::U32 data = ir.ReadShared(bit_size, is_signed, addr);
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr)};
ir.SetVectorReg(dst_reg, data);
}
}
@@ -41,17 +43,26 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnI
const IR::VectorReg data0{inst.src[1].code};
const IR::VectorReg data1{inst.src[2].code};
if (is_pair) {
ASSERT(bit_size == 32);
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0)));
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1)));
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
} else if (bit_size == 64) {
const IR::U64 data = ir.PackUint2x32(
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)));
const IR::Value data =
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
ir.WriteShared(bit_size, data, addr);
} else {
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr);
}
}
void Translator::S_BARRIER() {
ir.Barrier();
}
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
UNREACHABLE();
}
} // namespace Shader::Gcn

View File

@@ -318,4 +318,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
ir.SetScc(ir.Imm1(false));
}
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
// This only really exists to let resource tracking pass know
// there is an inline cbuf.
SetDst(inst.dst[0], ir.Imm32(pc));
}
void Translator::S_ADDC_U32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
}
} // namespace Shader::Gcn

View File

@@ -5,20 +5,29 @@
namespace Shader::Gcn {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
void Translator::S_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
const auto& smrd = inst.control.smrd;
ASSERT_MSG(smrd.imm, "Bindless texture loads unsupported");
const u32 dword_offset = [&] -> u32 {
if (smrd.imm) {
return smrd.offset;
}
if (smrd.offset == SQ_SRC_LITERAL) {
return inst.src[1].code;
}
UNREACHABLE();
}();
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::Value base =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(smrd.offset + i)));
ir.SetScalarReg(dst_reg++, ir.ReadConst(base, ir.Imm32(dword_offset + i)));
}
}
void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
static constexpr u32 SQ_SRC_LITERAL = 0xFF;
const auto& smrd = inst.control.smrd;
const IR::ScalarReg sbase{inst.src[0].code * 2};
const IR::U32 dword_offset = [&] -> IR::U32 {
@@ -30,7 +39,9 @@ void Translator::S_BUFFER_LOAD_DWORD(int num_dwords, const GcnInst& inst) {
}
return ir.ShiftRightLogical(ir.GetScalarReg(IR::ScalarReg(smrd.offset)), ir.Imm32(2));
}();
const IR::Value vsharp = ir.GetScalarReg(sbase);
const IR::Value vsharp =
ir.CompositeConstruct(ir.GetScalarReg(sbase), ir.GetScalarReg(sbase + 1),
ir.GetScalarReg(sbase + 2), ir.GetScalarReg(sbase + 3));
IR::ScalarReg dst_reg{inst.dst[0].code};
for (u32 i = 0; i < num_dwords; i++) {
const IR::U32 index = ir.IAdd(dword_offset, ir.Imm32(i));

View File

@@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
@@ -190,7 +193,20 @@ void Translator::EmitFetch(const GcnInst& inst) {
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
u32 fetch_size{};
const auto attribs = ParseFetchShader(code, &fetch_size);
if (Config::dumpShaders()) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("vs_fetch_{:#018x}.bin", info.pgm_hash);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteRaw<u8>(code, fetch_size);
}
for (const auto& attrib : attribs) {
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
IR::VectorReg dst_reg{attrib.dest_vgpr};
@@ -224,9 +240,9 @@ void Translator::EmitFetch(const GcnInst& inst) {
attrib.instance_data);
}
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt());
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.fmt = buffer.GetNumberFmt(),
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
.sgpr_base = attrib.sgpr_base,
@@ -236,12 +252,13 @@ void Translator::EmitFetch(const GcnInst& inst) {
}
}
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) {
return;
}
Translator translator{block, info};
for (const auto& inst : inst_list) {
block_base += inst.length;
switch (inst.opcode) {
case Opcode::S_MOVK_I32:
translator.S_MOVK(inst);
@@ -345,6 +362,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_BFREV_B32:
translator.V_BFREV_B32(inst);
break;
case Opcode::V_LDEXP_F32:
translator.V_LDEXP_F32(inst);
break;
case Opcode::V_FRACT_F32:
translator.V_FRACT_F32(inst);
break;
@@ -374,8 +394,40 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_SAMPLE_LZ:
case Opcode::IMAGE_SAMPLE:
case Opcode::IMAGE_SAMPLE_L:
case Opcode::IMAGE_SAMPLE_C_O:
case Opcode::IMAGE_SAMPLE_B:
translator.IMAGE_SAMPLE(inst);
break;
case Opcode::IMAGE_ATOMIC_ADD:
translator.IMAGE_ATOMIC(AtomicOp::Add, inst);
break;
case Opcode::IMAGE_ATOMIC_AND:
translator.IMAGE_ATOMIC(AtomicOp::And, inst);
break;
case Opcode::IMAGE_ATOMIC_OR:
translator.IMAGE_ATOMIC(AtomicOp::Or, inst);
break;
case Opcode::IMAGE_ATOMIC_XOR:
translator.IMAGE_ATOMIC(AtomicOp::Xor, inst);
break;
case Opcode::IMAGE_ATOMIC_UMAX:
translator.IMAGE_ATOMIC(AtomicOp::Umax, inst);
break;
case Opcode::IMAGE_ATOMIC_SMAX:
translator.IMAGE_ATOMIC(AtomicOp::Smax, inst);
break;
case Opcode::IMAGE_ATOMIC_UMIN:
translator.IMAGE_ATOMIC(AtomicOp::Umin, inst);
break;
case Opcode::IMAGE_ATOMIC_SMIN:
translator.IMAGE_ATOMIC(AtomicOp::Smin, inst);
break;
case Opcode::IMAGE_ATOMIC_INC:
translator.IMAGE_ATOMIC(AtomicOp::Inc, inst);
break;
case Opcode::IMAGE_ATOMIC_DEC:
translator.IMAGE_ATOMIC(AtomicOp::Dec, inst);
break;
case Opcode::IMAGE_GET_LOD:
translator.IMAGE_GET_LOD(inst);
break;
@@ -457,9 +509,15 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_NGT_F32:
translator.V_CMP_F32(ConditionOp::LE, false, inst);
break;
case Opcode::V_CMP_NGE_F32:
translator.V_CMP_F32(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LT_U32:
translator.S_CMP(ConditionOp::LT, false, inst);
break;
case Opcode::S_CMP_LE_U32:
translator.S_CMP(ConditionOp::LE, false, inst);
break;
case Opcode::S_CMP_LG_U32:
translator.S_CMP(ConditionOp::LG, false, inst);
break;
@@ -487,6 +545,12 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CNDMASK_B32:
translator.V_CNDMASK_B32(inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_X:
translator.BUFFER_LOAD_FORMAT(1, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XY:
translator.BUFFER_LOAD_FORMAT(2, true, inst);
break;
case Opcode::TBUFFER_LOAD_FORMAT_XYZ:
translator.BUFFER_LOAD_FORMAT(3, true, inst);
break;
@@ -581,6 +645,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CVT_I32_F32:
translator.V_CVT_I32_F32(inst);
break;
case Opcode::V_CVT_FLR_I32_F32:
translator.V_CVT_FLR_I32_F32(inst);
break;
case Opcode::V_SUBREV_F32:
translator.V_SUBREV_F32(inst);
break;
@@ -715,6 +782,7 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
translator.V_MAD_I32_I24(inst);
break;
case Opcode::V_MUL_I32_I24:
case Opcode::V_MUL_U32_U24:
translator.V_MUL_I32_I24(inst);
break;
case Opcode::V_SUB_I32:
@@ -771,6 +839,9 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::V_CMP_NE_U64:
translator.V_CMP_NE_U64(inst);
break;
case Opcode::V_CMP_CLASS_F32:
translator.V_CMP_CLASS_F32(inst);
break;
case Opcode::V_TRUNC_F32:
translator.V_TRUNC_F32(inst);
break;
@@ -786,7 +857,11 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::S_ADD_U32:
translator.S_ADD_U32(inst);
break;
case Opcode::S_ADDC_U32:
translator.S_ADDC_U32(inst);
break;
case Opcode::S_SUB_U32:
case Opcode::S_SUB_I32:
translator.S_SUB_U32(inst);
break;
// TODO: Separate implementation for legacy variants.
@@ -809,9 +884,30 @@ void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info)
case Opcode::IMAGE_GET_RESINFO:
translator.IMAGE_GET_RESINFO(inst);
break;
case Opcode::S_BARRIER:
translator.S_BARRIER();
break;
case Opcode::S_TTRACEDATA:
LOG_WARNING(Render_Vulkan, "S_TTRACEDATA instruction!");
break;
case Opcode::DS_READ_B32:
translator.DS_READ(32, false, false, inst);
break;
case Opcode::DS_READ2_B32:
translator.DS_READ(32, false, true, inst);
break;
case Opcode::DS_WRITE_B32:
translator.DS_WRITE(32, false, false, inst);
break;
case Opcode::DS_WRITE2_B32:
translator.DS_WRITE(32, false, true, inst);
break;
case Opcode::V_READFIRSTLANE_B32:
translator.V_READFIRSTLANE_B32(inst);
break;
case Opcode::S_GETPC_B64:
translator.S_GETPC_B64(block_base, inst);
break;
case Opcode::S_NOP:
case Opcode::S_CBRANCH_EXECZ:
case Opcode::S_CBRANCH_SCC0:

View File

@@ -26,6 +26,25 @@ enum class ConditionOp : u32 {
TRU,
};
enum class AtomicOp : u32 {
Swap,
CmpSwap,
Add,
Sub,
Smin,
Umin,
Smax,
Umax,
And,
Or,
Xor,
Inc,
Dec,
FCmpSwap,
Fmin,
Fmax,
};
enum class NegateMode : u32 {
None,
Src1,
@@ -61,6 +80,8 @@ public:
void S_BREV_B32(const GcnInst& inst);
void S_ADD_U32(const GcnInst& inst);
void S_SUB_U32(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_ADDC_U32(const GcnInst& inst);
// Scalar Memory
void S_LOAD_DWORD(int num_dwords, const GcnInst& inst);
@@ -133,6 +154,9 @@ public:
void V_NOT_B32(const GcnInst& inst);
void V_CVT_F32_UBYTE(u32 index, const GcnInst& inst);
void V_BFREV_B32(const GcnInst& inst);
void V_LDEXP_F32(const GcnInst& inst);
void V_CVT_FLR_I32_F32(const GcnInst& inst);
void V_CMP_CLASS_F32(const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst& inst);
@@ -145,6 +169,8 @@ public:
void DS_SWIZZLE_B32(const GcnInst& inst);
void DS_READ(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void DS_WRITE(int bit_size, bool is_signed, bool is_pair, const GcnInst& inst);
void V_READFIRSTLANE_B32(const GcnInst& inst);
void S_BARRIER();
// MIMG
void IMAGE_GET_RESINFO(const GcnInst& inst);
@@ -153,6 +179,7 @@ public:
void IMAGE_STORE(const GcnInst& inst);
void IMAGE_LOAD(bool has_mip, const GcnInst& inst);
void IMAGE_GET_LOD(const GcnInst& inst);
void IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst);
// Export
void EXP(const GcnInst& inst);
@@ -167,6 +194,6 @@ private:
static std::array<bool, IR::NumScalarRegs> exec_contexts;
};
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn

View File

@@ -28,7 +28,8 @@ void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
void Translator::V_CVT_F32_F16(const GcnInst& inst) {
const IR::U32 src0 = GetSrc(inst.src[0]);
SetDst(inst.dst[0], ir.ConvertUToF(32, 16, src0));
const IR::U16 src0l = ir.UConvert(16, src0);
SetDst(inst.dst[0], ir.FPConvert(32, ir.BitCast<IR::F16>(src0l)));
}
void Translator::V_MUL_F32(const GcnInst& inst) {
@@ -50,11 +51,14 @@ void Translator::V_CNDMASK_B32(const GcnInst& inst) {
};
const bool has_flt_source =
is_float_const(inst.src[0].field) || is_float_const(inst.src[1].field);
const IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
IR::U32F32 src0 = GetSrc(inst.src[0], has_flt_source);
IR::U32F32 src1 = GetSrc(inst.src[1], has_flt_source);
if (src0.Type() == IR::Type::F32 && src1.Type() == IR::Type::U32) {
src1 = ir.BitCast<IR::F32, IR::U32>(src1);
}
if (src1.Type() == IR::Type::F32 && src0.Type() == IR::Type::U32) {
src0 = ir.BitCast<IR::F32, IR::U32>(src0);
}
const IR::Value result = ir.Select(flag, src1, src0);
ir.SetVectorReg(dst_reg, IR::U32F32{result});
}
@@ -502,4 +506,19 @@ void Translator::V_BFREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(src0));
}
void Translator::V_LDEXP_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::U32 src1{GetSrc(inst.src[1])};
SetDst(inst.dst[0], ir.FPLdexp(src0, src1));
}
void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.ConvertFToI(32, true, ir.FPFloor(src0)));
}
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
UNREACHABLE();
}
} // namespace Shader::Gcn

View File

@@ -212,10 +212,15 @@ void Translator::IMAGE_STORE(const GcnInst& inst) {
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
ASSERT(mimg.dmask == 0xF);
const IR::Value value = ir.CompositeConstruct(
ir.GetVectorReg<IR::F32>(data_reg), ir.GetVectorReg<IR::F32>(data_reg + 1),
ir.GetVectorReg<IR::F32>(data_reg + 2), ir.GetVectorReg<IR::F32>(data_reg + 3));
boost::container::static_vector<IR::F32, 4> comps;
for (u32 i = 0; i < 4; i++) {
if (((mimg.dmask >> i) & 1) == 0) {
comps.push_back(ir.Imm32(0.f));
continue;
}
comps.push_back(ir.GetVectorReg<IR::F32>(data_reg++));
}
const IR::Value value = ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]);
ir.ImageWrite(handle, body, value, {});
}
@@ -245,7 +250,10 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, const GcnInst
info.nfmt.Assign(static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt));
}
const IR::Value value = ir.LoadBuffer(num_dwords, ir.GetScalarReg(sharp), address, info);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
const IR::Value value = ir.LoadBuffer(num_dwords, handle, address, info);
const IR::VectorReg dst_reg{inst.src[1].code};
if (num_dwords == 1) {
ir.SetVectorReg(dst_reg, IR::F32{value});
@@ -304,7 +312,10 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, bool is_typed, const GcnIns
ir.GetVectorReg<Shader::IR::F32>(src_reg + 3));
break;
}
ir.StoreBuffer(num_dwords, ir.GetScalarReg(sharp), address, value, info);
const IR::Value handle =
ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1),
ir.GetScalarReg(sharp + 2), ir.GetScalarReg(sharp + 3));
ir.StoreBuffer(num_dwords, handle, address, value, info);
}
void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
@@ -322,4 +333,48 @@ void Translator::IMAGE_GET_LOD(const GcnInst& inst) {
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(lod, 1)});
}
void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
const auto& mimg = inst.control.mimg;
IR::VectorReg val_reg{inst.dst[0].code};
IR::VectorReg addr_reg{inst.src[0].code};
const IR::ScalarReg tsharp_reg{inst.src[2].code * 4};
const IR::Value value = ir.GetVectorReg(val_reg);
const IR::Value handle = ir.GetScalarReg(tsharp_reg);
const IR::Value body =
ir.CompositeConstruct(ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1),
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
const IR::Value prev = [&] {
switch (op) {
case AtomicOp::Swap:
return ir.ImageAtomicExchange(handle, body, value, {});
case AtomicOp::Add:
return ir.ImageAtomicIAdd(handle, body, value, {});
case AtomicOp::Smin:
return ir.ImageAtomicIMin(handle, body, value, true, {});
case AtomicOp::Umin:
return ir.ImageAtomicUMin(handle, body, value, {});
case AtomicOp::Smax:
return ir.ImageAtomicIMax(handle, body, value, true, {});
case AtomicOp::Umax:
return ir.ImageAtomicUMax(handle, body, value, {});
case AtomicOp::And:
return ir.ImageAtomicAnd(handle, body, value, {});
case AtomicOp::Or:
return ir.ImageAtomicOr(handle, body, value, {});
case AtomicOp::Xor:
return ir.ImageAtomicXor(handle, body, value, {});
case AtomicOp::Inc:
return ir.ImageAtomicInc(handle, body, value, {});
case AtomicOp::Dec:
return ir.ImageAtomicDec(handle, body, value, {});
default:
UNREACHABLE();
}
}();
if (mimg.glc) {
ir.SetVectorReg(val_reg, IR::U32{prev});
}
}
} // namespace Shader::Gcn