mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-12 14:48:52 +00:00
shader_recompiler: Reorganize data share operations and implement GDS bit (#3222)
* shader_recompiler: Reorganize data share operations and implement GDS bit * Review comments
This commit is contained in:
@@ -291,78 +291,137 @@ void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
Inst(Opcode::SetPatch, patch, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset, bool is_gds) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
return Inst<U16>(Opcode::LoadSharedU16, offset);
|
||||
return Inst<U16>(Opcode::LoadSharedU16, Flags{is_gds}, offset);
|
||||
case 32:
|
||||
return Inst<U32>(Opcode::LoadSharedU32, offset);
|
||||
return Inst<U32>(Opcode::LoadSharedU32, Flags{is_gds}, offset);
|
||||
case 64:
|
||||
return Inst<U64>(Opcode::LoadSharedU64, offset);
|
||||
return Inst<U64>(Opcode::LoadSharedU64, Flags{is_gds}, offset);
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) {
|
||||
void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds) {
|
||||
switch (bit_size) {
|
||||
case 16:
|
||||
Inst(Opcode::WriteSharedU16, offset, value);
|
||||
Inst(Opcode::WriteSharedU16, Flags{is_gds}, offset, value);
|
||||
break;
|
||||
case 32:
|
||||
Inst(Opcode::WriteSharedU32, offset, value);
|
||||
Inst(Opcode::WriteSharedU32, Flags{is_gds}, offset, value);
|
||||
break;
|
||||
case 64:
|
||||
Inst(Opcode::WriteSharedU64, offset, value);
|
||||
Inst(Opcode::WriteSharedU64, Flags{is_gds}, offset, value);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid bit size {}", bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data) {
|
||||
U32U64 IREmitter::SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, address, data);
|
||||
return Inst<U32>(Opcode::SharedAtomicIAdd32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, address, data);
|
||||
return Inst<U64>(Opcode::SharedAtomicIAdd64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMin(const U32& address, const U32& data, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMin32, address, data)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMin32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMin32 : Opcode::SharedAtomicUMin32,
|
||||
Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMin64 : Opcode::SharedAtomicUMin64,
|
||||
Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicIMax(const U32& address, const U32& data, bool is_signed) {
|
||||
return is_signed ? Inst<U32>(Opcode::SharedAtomicSMax32, address, data)
|
||||
: Inst<U32>(Opcode::SharedAtomicUMax32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(is_signed ? Opcode::SharedAtomicSMax32 : Opcode::SharedAtomicUMax32,
|
||||
Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(is_signed ? Opcode::SharedAtomicSMax64 : Opcode::SharedAtomicUMax64,
|
||||
Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicAnd(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicOr(const U32& address, const U32& data) {
|
||||
U32U64 IREmitter::SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicAnd32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicAnd64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
return Inst<U32>(Opcode::SharedAtomicOr32, address, data);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicXor(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicXor32, address, data);
|
||||
U32U64 IREmitter::SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicXor32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicXor64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicInc(const U32& address) {
|
||||
return Inst<U32>(Opcode::SharedAtomicInc32, address);
|
||||
U32U64 IREmitter::SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds) {
|
||||
switch (data.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<U32>(Opcode::SharedAtomicISub32, Flags{is_gds}, address, data);
|
||||
case Type::U64:
|
||||
return Inst<U64>(Opcode::SharedAtomicISub64, Flags{is_gds}, address, data);
|
||||
default:
|
||||
ThrowInvalidType(data.Type());
|
||||
}
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicDec(const U32& address) {
|
||||
return Inst<U32>(Opcode::SharedAtomicDec32, address);
|
||||
template <>
|
||||
U32 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||
return Inst<U32>(Opcode::SharedAtomicInc32, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::SharedAtomicISub(const U32& address, const U32& data) {
|
||||
return Inst<U32>(Opcode::SharedAtomicISub32, address, data);
|
||||
template <>
|
||||
U64 IREmitter::SharedAtomicInc(const U32& address, bool is_gds) {
|
||||
return Inst<U64>(Opcode::SharedAtomicInc64, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
template <>
|
||||
U32 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||
return Inst<U32>(Opcode::SharedAtomicDec32, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
template <>
|
||||
U64 IREmitter::SharedAtomicDec(const U32& address, bool is_gds) {
|
||||
return Inst<U64>(Opcode::SharedAtomicDec64, Flags{is_gds}, address);
|
||||
}
|
||||
|
||||
U32 IREmitter::ReadConst(const Value& base, const U32& offset) {
|
||||
|
||||
@@ -96,18 +96,24 @@ public:
|
||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||
void SetPatch(Patch patch, const F32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset,
|
||||
bool is_gds = false);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset, bool is_gds = false);
|
||||
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data);
|
||||
[[nodiscard]] U32 SharedAtomicISub(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicIMin(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicIMax(const U32& address, const U32& data, bool is_signed);
|
||||
[[nodiscard]] U32 SharedAtomicInc(const U32& address);
|
||||
[[nodiscard]] U32 SharedAtomicDec(const U32& address);
|
||||
[[nodiscard]] U32 SharedAtomicAnd(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicOr(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32 SharedAtomicXor(const U32& address, const U32& data);
|
||||
[[nodiscard]] U32U64 SharedAtomicIAdd(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicISub(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicIMin(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicIMax(const U32& address, const U32U64& data, bool is_signed,
|
||||
bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicAnd(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicOr(const U32& address, const U32U64& data, bool is_gds);
|
||||
[[nodiscard]] U32U64 SharedAtomicXor(const U32& address, const U32U64& data, bool is_gds);
|
||||
|
||||
template <typename T = U32>
|
||||
[[nodiscard]] T SharedAtomicInc(const U32& address, bool is_gds);
|
||||
template <typename T = U32>
|
||||
[[nodiscard]] T SharedAtomicDec(const U32& address, bool is_gds);
|
||||
|
||||
[[nodiscard]] U32 ReadConst(const Value& base, const U32& offset);
|
||||
[[nodiscard]] U32 ReadConstBuffer(const Value& handle, const U32& index);
|
||||
|
||||
@@ -92,7 +92,6 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::WriteSharedU32:
|
||||
case Opcode::WriteSharedU64:
|
||||
case Opcode::SharedAtomicIAdd32:
|
||||
case Opcode::SharedAtomicIAdd64:
|
||||
case Opcode::SharedAtomicISub32:
|
||||
case Opcode::SharedAtomicSMin32:
|
||||
case Opcode::SharedAtomicUMin32:
|
||||
@@ -103,6 +102,17 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::SharedAtomicAnd32:
|
||||
case Opcode::SharedAtomicOr32:
|
||||
case Opcode::SharedAtomicXor32:
|
||||
case Opcode::SharedAtomicIAdd64:
|
||||
case Opcode::SharedAtomicISub64:
|
||||
case Opcode::SharedAtomicSMin64:
|
||||
case Opcode::SharedAtomicUMin64:
|
||||
case Opcode::SharedAtomicSMax64:
|
||||
case Opcode::SharedAtomicUMax64:
|
||||
case Opcode::SharedAtomicInc64:
|
||||
case Opcode::SharedAtomicDec64:
|
||||
case Opcode::SharedAtomicAnd64:
|
||||
case Opcode::SharedAtomicOr64:
|
||||
case Opcode::SharedAtomicXor64:
|
||||
case Opcode::ImageWrite:
|
||||
case Opcode::ImageAtomicIAdd32:
|
||||
case Opcode::ImageAtomicSMin32:
|
||||
|
||||
@@ -41,15 +41,25 @@ OPCODE(WriteSharedU64, Void, U32,
|
||||
OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicIAdd64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicISub32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicISub64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicSMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMin64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicUMin32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMin64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicSMax32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicSMax64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicUMax32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicUMax64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicInc32, U32, U32, )
|
||||
OPCODE(SharedAtomicInc64, U64, U32, )
|
||||
OPCODE(SharedAtomicDec32, U32, U32, )
|
||||
OPCODE(SharedAtomicDec64, U64, U32, )
|
||||
OPCODE(SharedAtomicAnd32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicAnd64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicOr32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicOr64, U64, U32, U64, )
|
||||
OPCODE(SharedAtomicXor32, U32, U32, U32, )
|
||||
OPCODE(SharedAtomicXor64, U64, U32, U64, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetUserData, U32, ScalarReg, )
|
||||
|
||||
@@ -84,8 +84,42 @@ bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
}
|
||||
|
||||
bool IsDataRingInstruction(const IR::Inst& inst) {
|
||||
return inst.GetOpcode() == IR::Opcode::DataAppend ||
|
||||
inst.GetOpcode() == IR::Opcode::DataConsume;
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::DataAppend:
|
||||
case IR::Opcode::DataConsume:
|
||||
return true;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicUMin32:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMax32:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicInc32:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec32:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
return inst.Flags<bool>(); // is_gds
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
|
||||
@@ -507,7 +541,8 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||
}
|
||||
}
|
||||
|
||||
void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||
void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
Descriptors& descriptors) {
|
||||
const u32 binding = descriptors.Add(BufferResource{
|
||||
.used_types = IR::Type::U32,
|
||||
.inline_cbuf = AmdGpu::Buffer::Null(),
|
||||
@@ -515,37 +550,111 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
|
||||
.is_written = true,
|
||||
});
|
||||
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
// Attempt to deduce the GDS address of counter at compile time.
|
||||
u32 gds_addr = 0;
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||
} else {
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
gds_addr = m0_val & 0xFFFF;
|
||||
}
|
||||
|
||||
// Patch instruction.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
|
||||
// For data append/consume operations attempt to deduce the GDS address.
|
||||
if (inst.GetOpcode() == IR::Opcode::DataAppend || inst.GetOpcode() == IR::Opcode::DataConsume) {
|
||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||
return inst;
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
u32 gds_addr = 0;
|
||||
const IR::Value& gds_offset = inst.Arg(0);
|
||||
if (gds_offset.IsImmediate()) {
|
||||
// Nothing to do, offset is known.
|
||||
gds_addr = gds_offset.U32() & 0xFFFF;
|
||||
} else {
|
||||
const auto result = IR::BreadthFirstSearch(&inst, pred);
|
||||
ASSERT_MSG(result, "Unable to track M0 source");
|
||||
|
||||
// M0 must be set by some user data register.
|
||||
const IR::Inst* prod = gds_offset.InstRecursive();
|
||||
const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg());
|
||||
u32 m0_val = info.user_data[ud_reg] >> 16;
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
m0_val += prod->Arg(1).U32();
|
||||
}
|
||||
gds_addr = m0_val & 0xFFFF;
|
||||
}
|
||||
|
||||
// Patch instruction.
|
||||
inst.SetArg(0, ir.Imm32(gds_addr >> 2));
|
||||
inst.SetArg(1, ir.Imm32(binding));
|
||||
} else {
|
||||
// Convert shared memory opcode to storage buffer atomic to GDS buffer.
|
||||
const IR::U32 offset = IR::U32{inst.Arg(0)};
|
||||
const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
|
||||
const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
|
||||
const IR::U32 address_qwords = ir.ShiftRightLogical(offset, ir.Imm32(3));
|
||||
const IR::U32 handle = ir.Imm32(binding);
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicIAdd(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
inst.ReplaceUsesWith(
|
||||
ir.BufferAtomicIAdd(handle, address_qwords, IR::U64{inst.Arg(1)}, {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicISub(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicUMin32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||
inst.ReplaceUsesWith(
|
||||
ir.BufferAtomicIMin(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicUMax32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||
inst.ReplaceUsesWith(
|
||||
ir.BufferAtomicIMax(handle, address_dwords, inst.Arg(1), is_signed, {}));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::SharedAtomicInc32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicInc(handle, address_dwords, {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicDec32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicDec(handle, address_dwords, {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicAnd(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicOr(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
|
||||
break;
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU16:
|
||||
ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
|
||||
inst.Invalidate();
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||
@@ -916,8 +1025,6 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||
PatchBufferSharp(*block, inst, info, descriptors);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageSharp(*block, inst, info, descriptors);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchDataRingAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -929,6 +1036,8 @@ void ResourceTrackingPass(IR::Program& program) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
} else if (IsImageInstruction(inst)) {
|
||||
PatchImageArgs(*block, inst, info);
|
||||
} else if (IsDataRingInstruction(inst)) {
|
||||
PatchGlobalDataShareAccess(*block, inst, info, descriptors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,16 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
info.shared_types |= IR::Type::U32;
|
||||
break;
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
info.uses_shared_int64_atomics = true;
|
||||
[[fallthrough]];
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
|
||||
@@ -15,6 +15,16 @@ static bool Requires16BitSharedAtomic(const IR::Inst& inst) {
|
||||
static bool Requires64BitSharedAtomic(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
||||
@@ -17,7 +17,6 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicIAdd32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicUMin32:
|
||||
@@ -28,6 +27,17 @@ static bool IsSharedAccess(const IR::Inst& inst) {
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@@ -64,6 +74,16 @@ IR::Type CalculateSharedMemoryTypes(IR::Program& program) {
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU64:
|
||||
case IR::Opcode::SharedAtomicIAdd64:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64:
|
||||
case IR::Opcode::SharedAtomicInc64:
|
||||
case IR::Opcode::SharedAtomicDec64:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
used_types |= IR::Type::U64;
|
||||
break;
|
||||
default:
|
||||
@@ -119,19 +139,26 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
ir.BufferAtomicIAdd(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicISub32:
|
||||
case IR::Opcode::SharedAtomicISub64:
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicISub(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicSMin32:
|
||||
case IR::Opcode::SharedAtomicUMin32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32;
|
||||
case IR::Opcode::SharedAtomicUMin32:
|
||||
case IR::Opcode::SharedAtomicSMin64:
|
||||
case IR::Opcode::SharedAtomicUMin64: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMin32 ||
|
||||
inst.GetOpcode() == IR::Opcode::SharedAtomicSMin64;
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicIMin(handle, address, inst.Arg(1), is_signed, {}));
|
||||
continue;
|
||||
}
|
||||
case IR::Opcode::SharedAtomicSMax32:
|
||||
case IR::Opcode::SharedAtomicUMax32: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32;
|
||||
case IR::Opcode::SharedAtomicUMax32:
|
||||
case IR::Opcode::SharedAtomicSMax64:
|
||||
case IR::Opcode::SharedAtomicUMax64: {
|
||||
const bool is_signed = inst.GetOpcode() == IR::Opcode::SharedAtomicSMax32 ||
|
||||
inst.GetOpcode() == IR::Opcode::SharedAtomicSMax64;
|
||||
inst.ReplaceUsesWithAndRemove(
|
||||
ir.BufferAtomicIMax(handle, address, inst.Arg(1), is_signed, {}));
|
||||
continue;
|
||||
@@ -143,12 +170,15 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicDec(handle, address, {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicAnd32:
|
||||
case IR::Opcode::SharedAtomicAnd64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicAnd(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicOr32:
|
||||
case IR::Opcode::SharedAtomicOr64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicOr(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::SharedAtomicXor32:
|
||||
case IR::Opcode::SharedAtomicXor64:
|
||||
inst.ReplaceUsesWithAndRemove(ir.BufferAtomicXor(handle, address, inst.Arg(1), {}));
|
||||
continue;
|
||||
case IR::Opcode::LoadSharedU16:
|
||||
@@ -173,7 +203,7 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
|
||||
inst.Invalidate();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user