Implement MUBUF instructions for shorts/bytes (#2856)

* implement loads/store instructions for types smaller than dwords

* initialize s16/s8 types

* set profile for int8/16/64

* also need to zero extend u8/u16 to u32 result

* document unrelated bugs with atomic fmin/max

* remove profile checks and simple emit for added opcodes

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
baggins183
2025-07-18 02:04:50 -07:00
committed by GitHub
parent 76f003d388
commit 3019bfb978
11 changed files with 120 additions and 58 deletions

View File

@@ -1979,6 +1979,24 @@ U8U16U32U64 IREmitter::UConvert(size_t result_bitsize, const U8U16U32U64& value)
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
U8U16U32U64 IR::IREmitter::SConvert(size_t result_bitsize, const U8U16U32U64& value) {
switch (result_bitsize) {
case 32:
switch (value.Type()) {
case Type::U8:
return Inst<U32>(Opcode::ConvertS32S8, value);
case Type::U16:
return Inst<U32>(Opcode::ConvertS32S16, value);
default:
break;
}
default:
break;
}
throw NotImplementedException("Signed Conversion from {} to {} bits", value.Type(),
result_bitsize);
}
F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
switch (result_bitsize) {
case 16:

View File

@@ -325,6 +325,7 @@ public:
const Value& value);
[[nodiscard]] U8U16U32U64 UConvert(size_t result_bitsize, const U8U16U32U64& value);
[[nodiscard]] U8U16U32U64 SConvert(size_t result_bitsize, const U8U16U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
[[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,

View File

@@ -432,6 +432,8 @@ OPCODE(ConvertU16U32, U16, U32,
OPCODE(ConvertU32U16, U32, U16, )
OPCODE(ConvertU8U32, U8, U32, )
OPCODE(ConvertU32U8, U32, U8, )
OPCODE(ConvertS32S8, U32, U8, )
OPCODE(ConvertS32S16, U32, U16, )
// Image operations
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, Opaque, )