diff --git a/CMakeLists.txt b/CMakeLists.txt index dac3b19ab..89cc3cc25 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -694,6 +694,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp src/shader_recompiler/ir/program.h + src/shader_recompiler/ir/reinterpret.h src/shader_recompiler/ir/reg.h src/shader_recompiler/ir/type.cpp src/shader_recompiler/ir/type.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 74e736cf6..469a672d7 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp @@ -42,6 +42,20 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); } +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); } @@ -78,6 +92,20 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); } +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); } @@ -114,6 +142,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); } +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + void EmitCompositeConstructF64x2(EmitContext&) { UNREACHABLE_MSG("SPIR-V Instruction"); } @@ -150,4 +192,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); } +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) { + return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1); +} + +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2) { + return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2); +} + +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3) { + return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3); +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 2f606eb45..334860e4e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -127,6 +127,11 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); @@ -136,6 +141,11 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); @@ -145,6 +155,11 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx); @@ -154,6 +169,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx); Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); +Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1); +Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2); +Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1, + u32 comp2, u32 comp3); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 05c3b1169..28fd703b2 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -10,6 +10,7 @@ #include "shader_recompiler/info.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" +#include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/types.h" @@ -475,11 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) { // Read the V# of the attribute to figure out component number and type. const auto buffer = info.ReadUdReg(attrib.sgpr_base, attrib.dword_offset); - const std::array components = buffer.DstSelect().Apply( - [&](const u32 index) { return ir.GetAttribute(attr, index); }, - [&](const u32 imm) { return ir.Imm32(float(imm)); }); - for (u32 i = 0; i < components.size(); i++) { - ir.SetVectorReg(dst_reg++, components[i]); + const auto values = + ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), + ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); + const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect()); + for (u32 i = 0; i < 4; i++) { + ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } // In case of programmable step rates we need to fallback to instance data pulling in diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index c241ec984..0762f070d 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -657,6 +657,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_ } } +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 4 || comp1 >= 4) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x2); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x2); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x2); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x2); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x3); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x3); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x3); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x3); + default: + ThrowInvalidType(vector1.Type()); + } +} + +Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3) { + if (vector1.Type() != vector2.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type()); + } + if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) { + UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2, + comp3); + } + const auto shuffle{[&](Opcode opcode) -> Value { + return Inst(opcode, vector1, vector2, Value{static_cast(comp0)}, + Value{static_cast(comp1)}, Value{static_cast(comp2)}, + Value{static_cast(comp3)}); + }}; + switch (vector1.Type()) { + case Type::U32x4: + return shuffle(Opcode::CompositeShuffleU32x4); + case Type::F16x4: + return shuffle(Opcode::CompositeShuffleF16x4); + case Type::F32x4: + return shuffle(Opcode::CompositeShuffleF32x4); + case Type::F64x4: + return shuffle(Opcode::CompositeShuffleF64x4); + default: + ThrowInvalidType(vector1.Type()); + } +} + Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { if (true_value.Type() != false_value.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 4cf44107e..15f68129c 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -152,6 +152,13 @@ public: [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2); + [[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0, + size_t comp1, size_t comp2, size_t comp3); + [[nodiscard]] Value Select(const U1& condition, const Value& true_value, const Value& false_value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index aafd43ea8..7f3c52642 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -122,6 +122,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) +OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, ) +OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) @@ -131,6 +134,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) +OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, ) +OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) @@ -140,6 +146,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) +OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, ) +OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) @@ -149,6 +158,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) +OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, ) +OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, ) +OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, ) // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 537907bc1..e537185fa 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/ir/breadth_first_search.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reinterpret.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -128,13 +129,6 @@ bool IsImageInstruction(const IR::Inst& inst) { } } -IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { - const std::array components = sharp.DstSelect().template Apply( - [&](const u32 index) { return ir.CompositeExtract(texel, index); }, - [&](const u32 imm) { return ir.Imm32(float(imm)); }); - return ir.CompositeConstruct(components[0], components[1], components[2], components[3]); -}; - class Descriptors { public: explicit Descriptors(Info& info_) @@ -387,15 +381,6 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); - - // Apply dst_sel swizzle on formatted buffer instructions - if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { - inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); - } else { - const auto inst_info = inst.Flags(); - const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); - inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); - } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -743,10 +728,6 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst.GetOpcode() == IR::Opcode::ImageWrite) { - inst.SetArg(4, SwizzleVector(ir, image, inst.Arg(4))); - } - if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite); @@ -761,6 +742,47 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip } } +void PatchTextureBufferInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto buffer_res = info.texture_buffers[binding]; + const auto buffer = buffer_res.GetSharp(info); + if (!buffer.Valid()) { + // Don't need to swizzle invalid buffer. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, ApplySwizzle(ir, inst.Arg(2), buffer.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::LoadBufferFormatF32) { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + const auto swizzled = ApplySwizzle(ir, texel, buffer.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + +void PatchImageInterpretation(IR::Block& block, IR::Inst& inst, Info& info) { + const auto binding = inst.Arg(0).U32(); + const auto image_res = info.images[binding & 0xFFFF]; + const auto image = image_res.GetSharp(info); + if (!image.Valid() || !image_res.IsStorage(image)) { + // Don't need to swizzle invalid or non-storage image. + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + if (inst.GetOpcode() == IR::Opcode::ImageWrite) { + inst.SetArg(4, ApplySwizzle(ir, inst.Arg(4), image.DstSelect())); + } else if (inst.GetOpcode() == IR::Opcode::ImageRead) { + const auto inst_info = inst.Flags(); + const auto texel = ir.ImageRead(inst.Arg(0), inst.Arg(1), IR::U32{inst.Arg(2)}, + IR::U32{inst.Arg(3)}, inst_info); + const auto swizzled = ApplySwizzle(ir, texel, image.DstSelect()); + inst.ReplaceUsesWith(swizzled); + } +} + void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { // Insert gds binding in the shader if it doesn't exist already. @@ -830,6 +852,19 @@ void ResourceTrackingPass(IR::Program& program) { } } } + // Second pass to reinterpret format read/write where needed, since we now know + // the bindings and their properties. + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsTextureBufferInstruction(inst)) { + PatchTextureBufferInterpretation(*block, inst, info); + continue; + } + if (IsImageInstruction(inst)) { + PatchImageInterpretation(*block, inst, info); + } + } + } } } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h new file mode 100644 index 000000000..73d587a56 --- /dev/null +++ b/src/shader_recompiler/ir/reinterpret.h @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "shader_recompiler/ir/ir_emitter.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader::IR { + +/// Applies a component swizzle to a vec4. +inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::CompMapping& swizzle) { + // Constants are indexed as 0 and 1, and components are 4-7. Thus we can apply a swizzle + // using two vectors and a shuffle, using one vector of constants and one of the components. + const auto zero = ir.Imm32(0.f); + const auto one = ir.Imm32(1.f); + const auto constants_vec = ir.CompositeConstruct(zero, one, zero, zero); + const auto swizzled = + ir.CompositeShuffle(constants_vec, vector, size_t(swizzle.r), size_t(swizzle.g), + size_t(swizzle.b), size_t(swizzle.a)); + return swizzled; +} + +} // namespace Shader::IR diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c876e9277..f1607f03e 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -912,24 +912,24 @@ struct Liverpool { }}, // Alternate std::array{{ - {.r = CompSwizzle::Zero, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Green}, - {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Zero, .a = CompSwizzle::Blue}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Alpha, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Red, .g = CompSwizzle::Green, .b = CompSwizzle::Alpha, .a = CompSwizzle::Zero}, {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Alpha}, }}, // StandardReverse std::array{{ - {CompSwizzle::Zero, CompSwizzle::Zero, CompSwizzle::Red, CompSwizzle::Zero}, - {CompSwizzle::Green, CompSwizzle::Red, CompSwizzle::Zero, CompSwizzle::Zero}, - {CompSwizzle::Blue, CompSwizzle::Green, CompSwizzle::Red, CompSwizzle::Zero}, - {CompSwizzle::Alpha, CompSwizzle::Blue, CompSwizzle::Green, CompSwizzle::Red}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Green, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Blue, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Blue, .b = CompSwizzle::Green, .a = CompSwizzle::Red}, }}, // AlternateReverse std::array{{ - {CompSwizzle::Zero, CompSwizzle::Zero, CompSwizzle::Zero, CompSwizzle::Red}, - {CompSwizzle::Green, CompSwizzle::Zero, CompSwizzle::Zero, CompSwizzle::Red}, - {CompSwizzle::Blue, CompSwizzle::Green, CompSwizzle::Zero, CompSwizzle::Red}, - {CompSwizzle::Alpha, CompSwizzle::Red, CompSwizzle::Green, CompSwizzle::Blue}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Zero, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Zero, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Green, .b = CompSwizzle::Red, .a = CompSwizzle::Zero}, + {.r = CompSwizzle::Alpha, .g = CompSwizzle::Red, .b = CompSwizzle::Green, .a = CompSwizzle::Blue}, }}, }}; // clang-format on diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 3a206f458..bf72f4f94 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -3,7 +3,6 @@ #pragma once -#include #include "common/alignment.h" #include "common/assert.h" #include "common/bit_field.h" @@ -31,38 +30,30 @@ struct CompMapping { template [[nodiscard]] std::array Apply(const std::array& data) const { - return Apply([&data](const u32 index) { return data[index]; }, - [](const u32 imm) { return T(imm); }); - } - - template - [[nodiscard]] std::array Apply(const std::function& get_value, - const std::function& get_immediate) const { return { - ApplySingle(get_value, get_immediate, r), - ApplySingle(get_value, get_immediate, g), - ApplySingle(get_value, get_immediate, b), - ApplySingle(get_value, get_immediate, a), + ApplySingle(data, r), + ApplySingle(data, g), + ApplySingle(data, b), + ApplySingle(data, a), }; } private: template - T ApplySingle(const std::function& get_value, - const std::function& get_immediate, const CompSwizzle swizzle) const { + T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { switch (swizzle) { case CompSwizzle::Zero: - return get_immediate(0); + return T(0); case CompSwizzle::One: - return get_immediate(1); + return T(1); case CompSwizzle::Red: - return get_value(0); + return data[0]; case CompSwizzle::Green: - return get_value(1); + return data[1]; case CompSwizzle::Blue: - return get_value(2); + return data[2]; case CompSwizzle::Alpha: - return get_value(3); + return data[3]; default: UNREACHABLE(); }