diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index b9707224c..28d3d2626 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -120,8 +120,10 @@ Id EmitUndefU8(EmitContext& ctx); Id EmitUndefU16(EmitContext& ctx); Id EmitUndefU32(EmitContext& ctx); Id EmitUndefU64(EmitContext& ctx); +Id EmitLoadSharedU16(EmitContext& ctx, Id offset); Id EmitLoadSharedU32(EmitContext& ctx, Id offset); Id EmitLoadSharedU64(EmitContext& ctx, Id offset); +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index 8b1610d61..a75986ee0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -6,6 +6,13 @@ namespace Shader::Backend::SPIRV { +Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { + const Id shift_id{ctx.ConstU32(1U)}; + const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; + const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, index); + return ctx.OpLoad(ctx.U16, pointer); +} + Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const Id shift_id{ctx.ConstU32(2U)}; const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; @@ -23,6 +30,13 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { ctx.OpLoad(ctx.U32[1], rhs_pointer)); } +void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { + const Id shift{ctx.ConstU32(1U)}; + const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; + const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, word_offset); + ctx.OpStore(pointer, value); +} + void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { const Id shift{ctx.ConstU32(2U)}; const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 9e51f8e60..245ac7eb2 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -979,12 +979,17 @@ void EmitContext::DefineSharedMemory() { } ASSERT(info.stage == Stage::Compute); const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; - const u32 num_elements{Common::DivCeil(shared_memory_size, 4U)}; - const Id type{TypeArray(U32[1], ConstU32(num_elements))}; - shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); + const Id type16{TypeArray(U16, ConstU32(Common::DivCeil(shared_memory_size, 2U)))}; + const Id type32{TypeArray(U32[1], ConstU32(Common::DivCeil(shared_memory_size, 4U)))}; + shared_memory_u16_type = TypePointer(spv::StorageClass::Workgroup, type16); + shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type32); + shared_u16 = TypePointer(spv::StorageClass::Workgroup, U16); shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); + shared_memory_u16 = AddGlobalVariable(shared_memory_u16_type, spv::StorageClass::Workgroup); shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); - Name(shared_memory_u32, "shared_mem"); + Name(shared_memory_u16, "shared_mem_u16"); + Name(shared_memory_u32, "shared_mem_u32"); + interfaces.push_back(shared_memory_u16); interfaces.push_back(shared_memory_u32); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 20d936cf0..358481ed9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -291,6 +291,7 @@ public: Id shared_memory_u32x2{}; Id shared_memory_u32x4{}; + Id shared_memory_u16_type{}; Id shared_memory_u32_type{}; Id bary_coord_persp_id{}; diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c29497ada..ee90ca611 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -61,10 +61,14 @@ void Translator::EmitDataShare(const GcnInst& inst) { return DS_READ(32, false, true, false, inst); case Opcode::DS_READ2ST64_B32: return DS_READ(32, false, true, true, inst); + case Opcode::DS_READ_U16: + return DS_READ(16, false, false, false, inst); case Opcode::DS_CONSUME: return DS_CONSUME(inst); case Opcode::DS_APPEND: return DS_APPEND(inst); + case Opcode::DS_WRITE_B16: + return DS_WRITE(16, false, false, false, inst); case Opcode::DS_WRITE_B64: return DS_WRITE(64, false, false, false, inst); case Opcode::DS_WRITE2_B64: @@ -305,6 +309,10 @@ void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0); ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)}); ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)}); + } else if (bit_size == 16) { + const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); + const IR::U16 data = IR::U16{ir.LoadShared(bit_size, is_signed, addr0)}; + ir.SetVectorReg(dst_reg, ir.UConvert(32, data)); } else { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)}; diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index e6cc32829..33a67b033 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -293,6 +293,8 @@ void IREmitter::SetPatch(Patch patch, const F32& value) { Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { switch (bit_size) { + case 16: + return Inst(Opcode::LoadSharedU16, offset); case 32: return Inst(Opcode::LoadSharedU32, offset); case 64: @@ -304,6 +306,9 @@ Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) { void IREmitter::WriteShared(int bit_size, const Value& value, const U32& offset) { switch (bit_size) { + case 16: + Inst(Opcode::WriteSharedU16, offset, value); + break; case 32: Inst(Opcode::WriteSharedU32, offset, value); break; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 5b3216be6..852c09319 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -30,8 +30,10 @@ OPCODE(EmitVertex, Void, OPCODE(EmitPrimitive, Void, ) // Shared memory operations +OPCODE(LoadSharedU16, U16, U32, ) OPCODE(LoadSharedU32, U32, U32, ) OPCODE(LoadSharedU64, U32x2, U32, ) +OPCODE(WriteSharedU16, Void, U32, U16, ) OPCODE(WriteSharedU32, Void, U32, U32, ) OPCODE(WriteSharedU64, Void, U32, U32x2, )