diff --git a/src/core/libraries/libs.cpp b/src/core/libraries/libs.cpp index 86f9fcf9d..fb771bc22 100644 --- a/src/core/libraries/libs.cpp +++ b/src/core/libraries/libs.cpp @@ -78,7 +78,6 @@ void InitHLELibs(Core::Loader::SymbolsResolver* sym) { Libraries::ImeDialog::RegisterlibSceImeDialog(sym); Libraries::AvPlayer::RegisterlibSceAvPlayer(sym); Libraries::Audio3d::RegisterlibSceAudio3d(sym); - Libraries::Fiber::RegisterlibSceFiber(sym); } } // namespace Libraries diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8f062d6e7..fc99b8925 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -59,19 +59,22 @@ struct ImageOperands { } } - void AddDerivatives(EmitContext& ctx, Id derivatives) { - if (!Sirit::ValidId(derivatives)) { + void AddDerivatives(EmitContext& ctx, Id derivatives_dx, Id derivatives_dy) { + if (!Sirit::ValidId(derivatives_dx) || !Sirit::ValidId(derivatives_dy)) { return; } - const Id dx{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 0, 1)}; - const Id dy{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 2, 3)}; - Add(spv::ImageOperandsMask::Grad, dx, dy); + Add(spv::ImageOperandsMask::Grad, derivatives_dx, derivatives_dy); } spv::ImageOperandsMask mask{}; boost::container::static_vector operands; }; +Id EmitImageSampleRaw(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address1, Id address2, + Id address3, Id address4) { + UNREACHABLE_MSG("Unreachable instruction"); +} + Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, const IR::Value& offset) { const auto& texture = ctx.images[handle & 0xFFFF]; @@ -114,7 +117,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, operands.AddOffset(ctx, offset); const Id sample = ctx.OpImageSampleDrefImplicitLod(result_type, sampled_image, coords, dref, operands.mask, operands.operands); - return texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + return ctx.OpCompositeConstruct(ctx.F32[4], sample_typed, ctx.f32_zero_value, + ctx.f32_zero_value, ctx.f32_zero_value); } Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, @@ -129,7 +134,9 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, operands.Add(spv::ImageOperandsMask::Lod, lod); const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref, operands.mask, operands.operands); - return texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample; + return ctx.OpCompositeConstruct(ctx.F32[4], sample_typed, ctx.f32_zero_value, + ctx.f32_zero_value, ctx.f32_zero_value); } Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, @@ -212,15 +219,15 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) { return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords); } -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives, - const IR::Value& offset, Id lod_clamp) { +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, + Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id result_type = texture.data_types->Get(4); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); ImageOperands operands; - operands.AddDerivatives(ctx, derivatives); + operands.AddDerivatives(ctx, derivatives_dx, derivatives_dy); operands.AddOffset(ctx, offset); const Id sample = ctx.OpImageSampleExplicitLod(result_type, sampled_image, coords, operands.mask, operands.operands); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 6ae1ef24a..02b98b343 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -368,6 +368,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value); Id EmitConvertU16U32(EmitContext& ctx, Id value); Id EmitConvertU32U16(EmitContext& ctx, Id value); +Id EmitImageSampleRaw(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address1, Id address2, + Id address3, Id address4); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias, const IR::Value& offset); Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, @@ -384,8 +386,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const Id lod, Id ms); Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips); Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords); -Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives, - const IR::Value& offset, Id lod_clamp); +Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx, + Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp); Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color); diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 98f97dd12..796bed127 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -654,7 +654,7 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) { OpcodeVOP3 vop3Op = static_cast(op); if (IsVop3BEncoding(m_instruction.opcode)) { - m_instruction.dst[1].field = OperandField::ScalarGPR; + m_instruction.dst[1].field = getOperandField(sdst); m_instruction.dst[1].type = ScalarType::Uint64; m_instruction.dst[1].code = sdst; } else { diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index c77588280..b70d4b829 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -155,6 +155,8 @@ public: void V_SUB_I32(const GcnInst& inst); void V_SUBREV_I32(const GcnInst& inst); void V_ADDC_U32(const GcnInst& inst); + void V_SUBB_U32(const GcnInst& inst); + void V_SUBBREV_U32(const GcnInst& inst); void V_LDEXP_F32(const GcnInst& inst); void V_CVT_PKNORM_U16_F32(const GcnInst& inst); void V_CVT_PKRTZ_F16_F32(const GcnInst& inst); @@ -273,7 +275,9 @@ private: void SetDst(const InstOperand& operand, const IR::U32F32& value); void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw); - // Vector ALU Helprers + // Vector ALU Helpers + IR::U32 GetCarryIn(const GcnInst& inst); + void SetCarryOut(const GcnInst& inst, const IR::U1& carry); IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0); void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index b061d3b78..279695461 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -87,6 +87,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { return V_SUBREV_I32(inst); case Opcode::V_ADDC_U32: return V_ADDC_U32(inst); + case Opcode::V_SUBB_U32: + return V_SUBB_U32(inst); + case Opcode::V_SUBBREV_U32: + return V_SUBBREV_U32(inst); case Opcode::V_LDEXP_F32: return V_LDEXP_F32(inst); case Opcode::V_CVT_PKNORM_U16_F32: @@ -546,51 +550,71 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { } void Translator::V_ADD_I32(const GcnInst& inst) { + // Signed or unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))}; - SetDst(inst.dst[0], ir.IAdd(src0, src1)); - // TODO: Carry + const IR::U32 result{ir.IAdd(src0, src1)}; + SetDst(inst.dst[0], result); + + // TODO: Carry-out with signed or unsigned components } void Translator::V_SUB_I32(const GcnInst& inst) { + // Unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src0, src1)); + const IR::U32 result{ir.ISub(src0, src1)}; + SetDst(inst.dst[0], result); + + const IR::U1 did_underflow{ir.IGreaterThan(src1, src0, false)}; + SetCarryOut(inst, did_underflow); } void Translator::V_SUBREV_I32(const GcnInst& inst) { + // Unsigned components const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; - SetDst(inst.dst[0], ir.ISub(src1, src0)); - // TODO: Carry-out + const IR::U32 result{ir.ISub(src1, src0)}; + SetDst(inst.dst[0], result); + + const IR::U1 did_underflow{ir.IGreaterThan(src0, src1, false)}; + SetCarryOut(inst, did_underflow); } void Translator::V_ADDC_U32(const GcnInst& inst) { - const auto src0 = GetSrc(inst.src[0]); - const auto src1 = GetSrc(inst.src[1]); - - IR::U1 carry; - if (inst.src_count == 3) { // VOP3 - if (inst.src[2].field == OperandField::VccLo) { - carry = ir.GetVcc(); - } else if (inst.src[2].field == OperandField::ScalarGPR) { - carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code)); - } else { - UNREACHABLE(); - } - } else { // VOP2 - carry = ir.GetVcc(); - } - - const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))}; - const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry); - + // Unsigned components + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{GetCarryIn(inst)}; + const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)}; SetDst(inst.dst[0], result); - const IR::U1 less_src0 = ir.ILessThan(result, src0, false); - const IR::U1 less_src1 = ir.ILessThan(result, src1, false); - const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); - ir.SetVcc(did_overflow); + const IR::U1 less_src0{ir.ILessThan(result, src0, false)}; + const IR::U1 less_src1{ir.ILessThan(result, src1, false)}; + const IR::U1 did_overflow{ir.LogicalOr(less_src0, less_src1)}; + SetCarryOut(inst, did_overflow); +} + +void Translator::V_SUBB_U32(const GcnInst& inst) { + // Signed or unsigned components + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{GetCarryIn(inst)}; + const IR::U32 result{ir.ISub(ir.ISub(src0, src1), carry)}; + SetDst(inst.dst[0], result); + + // TODO: Carry-out with signed or unsigned components +} + +void Translator::V_SUBBREV_U32(const GcnInst& inst) { + // Signed or unsigned components + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 src1{GetSrc(inst.src[1])}; + const IR::U32 carry{GetCarryIn(inst)}; + const IR::U32 result{ir.ISub(ir.ISub(src1, src0), carry)}; + SetDst(inst.dst[0], result); + + // TODO: Carry-out with signed or unsigned components } void Translator::V_LDEXP_F32(const GcnInst& inst) { @@ -1152,6 +1176,37 @@ void Translator::V_MAD_U64_U32(const GcnInst& inst) { ir.SetVcc(did_overflow); } +IR::U32 Translator::GetCarryIn(const GcnInst& inst) { + IR::U1 carry; + if (inst.src_count == 3) { // VOP3 + if (inst.src[2].field == OperandField::VccLo) { + carry = ir.GetVcc(); + } else if (inst.src[2].field == OperandField::ScalarGPR) { + carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code)); + } else { + UNREACHABLE(); + } + } else { // VOP2 + carry = ir.GetVcc(); + } + + return IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))}; +} + +void Translator::SetCarryOut(const GcnInst& inst, const IR::U1& carry) { + if (inst.dst_count == 2) { // VOP3 + if (inst.dst[1].field == OperandField::VccLo) { + ir.SetVcc(carry); + } else if (inst.dst[1].field == OperandField::ScalarGPR) { + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), carry); + } else { + UNREACHABLE(); + } + } else { // VOP2 + ir.SetVcc(carry); + } +} + // TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of // [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index e76ba6d8a..b7ad3b36b 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -411,7 +411,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) { ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); IR::TextureInstInfo info{}; - info.explicit_lod.Assign(has_mip); + info.has_lod.Assign(has_mip); const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info); for (u32 i = 0; i < 4; i++) { @@ -513,6 +513,76 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) { } } +IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg, + const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg, + bool gather) { + const auto& mimg = inst.control.mimg; + const auto flags = MimgModifierFlags(mimg.mod); + + IR::TextureInstInfo info{}; + info.is_depth.Assign(flags.test(MimgModifier::Pcf)); + info.has_bias.Assign(flags.test(MimgModifier::LodBias)); + info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); + info.force_level0.Assign(flags.test(MimgModifier::Level0)); + info.has_offset.Assign(flags.test(MimgModifier::Offset)); + info.has_lod.Assign(flags.any(MimgModifier::Lod)); + info.is_array.Assign(mimg.da); + + if (gather) { + info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); + info.is_gather.Assign(true); + } else { + info.has_derivatives.Assign(flags.test(MimgModifier::Derivative)); + } + + // Load first dword of T# and S#. We will use them as the handle that will guide resource + // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture + // binding index. + const IR::Value handle = + ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); + + // Determine how many address registers need to be passed. + // The image type is unknown, so add all 4 possible base registers and resolve later. + int num_addr_regs = 4; + if (info.has_offset) { + ++num_addr_regs; + } + if (info.has_bias) { + ++num_addr_regs; + } + if (info.is_depth) { + ++num_addr_regs; + } + if (info.has_derivatives) { + // The image type is unknown, so add all 6 possible derivative registers and resolve later. + num_addr_regs += 6; + } + + // Fetch all the address registers to pass in the IR instruction. There can be up to 13 + // registers. + const auto get_addr_reg = [&](int index) -> IR::F32 { + if (index >= num_addr_regs) { + return ir.Imm32(0.f); + } + return ir.GetVectorReg(addr_reg + index); + }; + const IR::Value address1 = + ir.CompositeConstruct(get_addr_reg(0), get_addr_reg(1), get_addr_reg(2), get_addr_reg(3)); + const IR::Value address2 = + ir.CompositeConstruct(get_addr_reg(4), get_addr_reg(5), get_addr_reg(6), get_addr_reg(7)); + const IR::Value address3 = + ir.CompositeConstruct(get_addr_reg(8), get_addr_reg(9), get_addr_reg(10), get_addr_reg(11)); + const IR::Value address4 = get_addr_reg(12); + + // Issue the placeholder IR instruction. + IR::Value texel = ir.ImageSampleRaw(handle, address1, address2, address3, address4, info); + if (info.is_depth && !gather) { + // For non-gather depth sampling, only return a single value. + texel = ir.CompositeExtract(texel, 0); + } + return texel; +} + void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const auto& mimg = inst.control.mimg; IR::VectorReg addr_reg{inst.src[0].code}; @@ -521,72 +591,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) { const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const auto flags = MimgModifierFlags(mimg.mod); - // Load first dword of T# and S#. We will use them as the handle that will guide resource - // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture - // binding index. - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); - - // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction - // Set Architecture - const IR::U32 offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::U32{}; - const IR::F32 bias = - flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - const IR::F32 dref = - flags.test(MimgModifier::Pcf) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - const IR::Value derivatives = [&] -> IR::Value { - if (!flags.test(MimgModifier::Derivative)) { - return {}; - } - addr_reg = addr_reg + 4; - return ir.CompositeConstruct( - ir.GetVectorReg(addr_reg - 4), ir.GetVectorReg(addr_reg - 3), - ir.GetVectorReg(addr_reg - 2), ir.GetVectorReg(addr_reg - 1)); - }(); - - // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler - // Since these are at most 4 dwords, we load them into a single uvec4 and place them - // in coords field of the instruction. Then the resource tracking pass will patch the - // IR instruction to fill in lod_clamp field. - const IR::Value body = ir.CompositeConstruct( - ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - - // Derivatives are tricky because their number depends on the texture type which is located in - // T#. We don't have access to T# though until resource tracking pass. For now assume if - // derivatives are present, that a 2D image is bound. - const bool has_derivatives = flags.test(MimgModifier::Derivative); - const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); - - IR::TextureInstInfo info{}; - info.is_depth.Assign(flags.test(MimgModifier::Pcf)); - info.has_bias.Assign(flags.test(MimgModifier::LodBias)); - info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); - info.force_level0.Assign(flags.test(MimgModifier::Level0)); - info.has_offset.Assign(flags.test(MimgModifier::Offset)); - info.explicit_lod.Assign(explicit_lod); - info.has_derivatives.Assign(has_derivatives); - info.is_array.Assign(mimg.da); - - // Issue IR instruction, leaving unknown fields blank to patch later. - const IR::Value texel = [&]() -> IR::Value { - if (has_derivatives) { - return ir.ImageGradient(handle, body, derivatives, offset, {}, info); - } - if (!flags.test(MimgModifier::Pcf)) { - if (explicit_lod) { - return ir.ImageSampleExplicitLod(handle, body, offset, info); - } else { - return ir.ImageSampleImplicitLod(handle, body, bias, offset, info); - } - } - if (explicit_lod) { - return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info); - } - return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info); - }(); - + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false); for (u32 i = 0; i < 4; i++) { if (((mimg.dmask >> i) & 1) == 0) { continue; @@ -609,60 +614,13 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { const IR::ScalarReg sampler_reg{inst.src[3].code * 4}; const auto flags = MimgModifierFlags(mimg.mod); - // Load first dword of T# and S#. We will use them as the handle that will guide resource - // tracking pass where to read the sharps. This will later also get patched to the SPIRV texture - // binding index. - const IR::Value handle = - ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg)); - - // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction - // Set Architecture - const IR::Value offset = - flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{}; - const IR::F32 bias = - flags.test(MimgModifier::LodBias) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - const IR::F32 dref = - flags.test(MimgModifier::Pcf) ? ir.GetVectorReg(addr_reg++) : IR::F32{}; - - // Derivatives are tricky because their number depends on the texture type which is located in - // T#. We don't have access to T# though until resource tracking pass. For now assume no - // derivatives are present, otherwise we don't know where coordinates are placed in the address - // stream. - ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction"); - - // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler - // Since these are at most 4 dwords, we load them into a single uvec4 and place them - // in coords field of the instruction. Then the resource tracking pass will patch the - // IR instruction to fill in lod_clamp field. - const IR::Value body = ir.CompositeConstruct( - ir.GetVectorReg(addr_reg), ir.GetVectorReg(addr_reg + 1), - ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3)); - - const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod); - - IR::TextureInstInfo info{}; - info.is_depth.Assign(flags.test(MimgModifier::Pcf)); - info.has_bias.Assign(flags.test(MimgModifier::LodBias)); - info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp)); - info.force_level0.Assign(flags.test(MimgModifier::Level0)); - info.has_offset.Assign(flags.test(MimgModifier::Offset)); - // info.explicit_lod.Assign(explicit_lod); - info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1); - info.is_array.Assign(mimg.da); - - // Issue IR instruction, leaving unknown fields blank to patch later. - const IR::Value texel = [&]() -> IR::Value { - const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{}; - if (!flags.test(MimgModifier::Pcf)) { - return ir.ImageGather(handle, body, offset, info); - } - ASSERT(mimg.dmask & 1); // should be always 1st (R) component - return ir.ImageGatherDref(handle, body, offset, dref, info); - }(); - // For gather4 instructions dmask selects which component to read and must have // only one bit set to 1 ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask"); + // should be always 1st (R) component for depth + ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1); + + const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true); for (u32 i = 0; i < 4; i++) { const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 4f5eb5c33..cfd044f9e 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -130,19 +130,23 @@ void IREmitter::DeviceMemoryBarrier() { } U32 IREmitter::GetUserData(IR::ScalarReg reg) { + ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetUserData, reg); } U1 IREmitter::GetThreadBitScalarReg(IR::ScalarReg reg) { + ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetThreadBitScalarReg, reg); } void IREmitter::SetThreadBitScalarReg(IR::ScalarReg reg, const U1& value) { + ASSERT(static_cast(reg) < IR::NumScalarRegs); Inst(Opcode::SetThreadBitScalarReg, reg, value); } template <> U32 IREmitter::GetScalarReg(IR::ScalarReg reg) { + ASSERT(static_cast(reg) < IR::NumScalarRegs); return Inst(Opcode::GetScalarRegister, reg); } @@ -153,6 +157,7 @@ F32 IREmitter::GetScalarReg(IR::ScalarReg reg) { template <> U32 IREmitter::GetVectorReg(IR::VectorReg reg) { + ASSERT(static_cast(reg) < IR::NumVectorRegs); return Inst(Opcode::GetVectorRegister, reg); } @@ -162,11 +167,13 @@ F32 IREmitter::GetVectorReg(IR::VectorReg reg) { } void IREmitter::SetScalarReg(IR::ScalarReg reg, const U32F32& value) { + ASSERT(static_cast(reg) < IR::NumScalarRegs); const U32 value_typed = value.Type() == Type::F32 ? BitCast(F32{value}) : U32{value}; Inst(Opcode::SetScalarRegister, reg, value_typed); } void IREmitter::SetVectorReg(IR::VectorReg reg, const U32F32& value) { + ASSERT(static_cast(reg) < IR::NumVectorRegs); const U32 value_typed = value.Type() == Type::F32 ? BitCast(F32{value}) : U32{value}; Inst(Opcode::SetVectorRegister, reg, value_typed); } @@ -1492,27 +1499,34 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value); } -Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias, - const U32& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset); +Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2, + const Value& address3, const Value& address4, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3, + address4); } -Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset); +Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, + const Value& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias, offset); } -F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref, - const F32& bias, const U32& offset, - TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias, - offset); +Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, + const Value& offset, TextureInstInfo info) { + return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset); } -F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref, - const U32& offset, TextureInstInfo info) { - return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{}, - offset); +Value IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& bias, const Value& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias, + offset); +} + +Value IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, + const F32& dref, const F32& lod, const Value& offset, + TextureInstInfo info) { + return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, offset); } Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, @@ -1544,9 +1558,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture return Inst(Opcode::ImageQueryLod, Flags{info}, handle, coords); } -Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives, +Value IREmitter::ImageGradient(const Value& handle, const Value& coords, + const Value& derivatives_dx, const Value& derivatives_dy, const Value& offset, const F32& lod_clamp, TextureInstInfo info) { - return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives, offset, lod_clamp); + return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives_dx, derivatives_dy, + offset, lod_clamp); } Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 2ebac037e..b3f513085 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -277,20 +277,25 @@ public: [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, TextureInstInfo info); + [[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1, + const Value& address2, const Value& address3, + const Value& address4, TextureInstInfo info); + [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body, - const F32& bias, const U32& offset, + const F32& bias, const Value& offset, TextureInstInfo info); [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body, - const U32& offset, TextureInstInfo info); + const F32& lod, const Value& offset, + TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body, - const F32& dref, const F32& bias, - const U32& offset, TextureInstInfo info); + [[nodiscard]] Value ImageSampleDrefImplicitLod(const Value& handle, const Value& body, + const F32& dref, const F32& bias, + const Value& offset, TextureInstInfo info); - [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body, - const F32& dref, const U32& offset, - TextureInstInfo info); + [[nodiscard]] Value ImageSampleDrefExplicitLod(const Value& handle, const Value& body, + const F32& dref, const F32& lod, + const Value& offset, TextureInstInfo info); [[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod, const U1& skip_mips); @@ -306,8 +311,9 @@ public: [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, const U32& lod, const U32& multisampling, TextureInstInfo info); [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, - const Value& derivatives, const Value& offset, - const F32& lod_clamp, TextureInstInfo info); + const Value& derivatives_dx, const Value& derivatives_dy, + const Value& offset, const F32& lod_clamp, + TextureInstInfo info); [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); void ImageWrite(const Value& handle, const Value& coords, const Value& color, TextureInstInfo info); diff --git a/src/shader_recompiler/ir/opcodes.h b/src/shader_recompiler/ir/opcodes.h index 2cea70090..200d7f421 100644 --- a/src/shader_recompiler/ir/opcodes.h +++ b/src/shader_recompiler/ir/opcodes.h @@ -21,7 +21,7 @@ namespace Detail { struct OpcodeMeta { std::string_view name; Type type; - std::array arg_types; + std::array arg_types; }; // using enum Type; diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 41e94ab13..51e10fb38 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -317,16 +317,17 @@ OPCODE(ConvertU16U32, U16, U32, OPCODE(ConvertU32U16, U32, U16, ) // Image operations -OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) -OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, ) -OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, ) -OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, ) +OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, ) +OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, ) +OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, ) +OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) +OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, ) OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, ) OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, ) OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) -OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) +OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, ) OPCODE(ImageRead, U32x4, Opaque, Opaque, ) OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index db0d75f0c..0d91badda 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -132,38 +132,16 @@ bool IsImageStorageInstruction(const IR::Inst& inst) { bool IsImageInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleImplicitLod: - case IR::Opcode::ImageSampleDrefExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: case IR::Opcode::ImageFetch: - case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: case IR::Opcode::ImageQueryDimensions: case IR::Opcode::ImageQueryLod: - case IR::Opcode::ImageGradient: + case IR::Opcode::ImageSampleRaw: return true; default: return IsImageStorageInstruction(inst); } } -u32 ImageOffsetArgumentPosition(const IR::Inst& inst) { - switch (inst.GetOpcode()) { - case IR::Opcode::ImageGather: - case IR::Opcode::ImageGatherDref: - return 2; - case IR::Opcode::ImageSampleExplicitLod: - case IR::Opcode::ImageSampleImplicitLod: - return 3; - case IR::Opcode::ImageSampleDrefExplicitLod: - case IR::Opcode::ImageSampleDrefImplicitLod: - return 4; - default: - UNREACHABLE(); - } -} - class Descriptors { public: explicit Descriptors(Info& info_) @@ -467,6 +445,191 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& } } +void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info, + Descriptors& descriptors, const IR::Inst* producer, + const u32 image_binding, const AmdGpu::Image& image) { + // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions + const u32 sampler_binding = [&] { + ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2); + const IR::Value& handle = producer->Arg(1); + // Inline sampler resource. + if (handle.IsImmediate()) { + LOG_WARNING(Render_Vulkan, "Inline sampler detected"); + return descriptors.Add(SamplerResource{ + .sgpr_base = std::numeric_limits::max(), + .dword_offset = 0, + .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, + }); + } + // Normal sampler resource. + const auto ssharp_handle = handle.InstRecursive(); + const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); + const auto ssharp = TrackSharp(ssharp_ud); + return descriptors.Add(SamplerResource{ + .sgpr_base = ssharp.sgpr_base, + .dword_offset = ssharp.dword_offset, + .associated_image = image_binding, + .disable_aniso = disable_aniso, + }); + }(); + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + + const auto inst_info = inst.Flags(); + const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16); + + IR::Inst* body1 = inst.Arg(1).InstRecursive(); + IR::Inst* body2 = inst.Arg(2).InstRecursive(); + IR::Inst* body3 = inst.Arg(3).InstRecursive(); + IR::F32 body4 = IR::F32{inst.Arg(4)}; + const auto get_addr_reg = [&](u32 index) -> IR::F32 { + if (index <= 3) { + return IR::F32{body1->Arg(index)}; + } + if (index >= 4 && index <= 7) { + return IR::F32{body2->Arg(index - 4)}; + } + if (index >= 8 && index <= 11) { + return IR::F32{body3->Arg(index - 8)}; + } + if (index == 12) { + return body4; + } + UNREACHABLE(); + }; + u32 addr_reg = 0; + + // Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction + // Set Architecture + const IR::Value offset = [&] -> IR::Value { + if (!inst_info.has_offset) { + return IR::U32{}; + } + + // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. + IR::Value arg = get_addr_reg(addr_reg++); + if (const IR::Inst* offset_inst = arg.TryInstRecursive()) { + ASSERT(offset_inst->GetOpcode() == IR::Opcode::BitCastF32U32); + arg = offset_inst->Arg(0); + } + + const auto read = [&](u32 off) -> IR::U32 { + if (arg.IsImmediate()) { + const u32 imm = + arg.Type() == IR::Type::F32 ? std::bit_cast(arg.F32()) : arg.U32(); + const u16 comp = (imm >> off) & 0x3F; + return ir.Imm32(s32(comp << 26) >> 26); + } + return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(off), ir.Imm32(6), true); + }; + + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + return read(0); + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + case AmdGpu::ImageType::Color2DMsaa: + return ir.CompositeConstruct(read(0), read(8)); + case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Cube: + return ir.CompositeConstruct(read(0), read(8), read(16)); + default: + UNREACHABLE(); + } + }(); + const IR::F32 bias = inst_info.has_bias ? get_addr_reg(addr_reg++) : IR::F32{}; + const IR::F32 dref = inst_info.is_depth ? get_addr_reg(addr_reg++) : IR::F32{}; + const auto [derivatives_dx, derivatives_dy] = [&] -> std::pair { + if (!inst_info.has_derivatives) { + return {}; + } + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: + case AmdGpu::ImageType::Color1DArray: + // du/dx, du/dy + addr_reg = addr_reg + 2; + return {get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)}; + case AmdGpu::ImageType::Color2D: + case AmdGpu::ImageType::Color2DArray: + case AmdGpu::ImageType::Color2DMsaa: + // (du/dx, dv/dx), (du/dy, dv/dy) + addr_reg = addr_reg + 4; + return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)), + ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))}; + case AmdGpu::ImageType::Color3D: + case AmdGpu::ImageType::Cube: + // (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy) + addr_reg = addr_reg + 6; + return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5), + get_addr_reg(addr_reg - 4)), + ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + get_addr_reg(addr_reg - 1))}; + default: + UNREACHABLE(); + } + }(); + + // Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler + const IR::Value coords = [&] -> IR::Value { + switch (image.GetType()) { + case AmdGpu::ImageType::Color1D: // x + addr_reg = addr_reg + 1; + return get_addr_reg(addr_reg - 1); + case AmdGpu::ImageType::Color1DArray: // x, slice + [[fallthrough]]; + case AmdGpu::ImageType::Color2D: // x, y + addr_reg = addr_reg + 2; + return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)); + case AmdGpu::ImageType::Color2DArray: // x, y, slice + [[fallthrough]]; + case AmdGpu::ImageType::Color2DMsaa: // x, y, frag + [[fallthrough]]; + case AmdGpu::ImageType::Color3D: // x, y, z + addr_reg = addr_reg + 3; + return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + get_addr_reg(addr_reg - 1)); + case AmdGpu::ImageType::Cube: // x, y, face + addr_reg = addr_reg + 3; + return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2), + get_addr_reg(addr_reg - 1), false, inst_info.is_array); + default: + UNREACHABLE(); + } + }(); + + ASSERT(!inst_info.has_lod || !inst_info.has_lod_clamp); + const bool explicit_lod = inst_info.has_lod || inst_info.force_level0; + const IR::F32 lod = inst_info.has_lod ? get_addr_reg(addr_reg++) + : inst_info.force_level0 ? ir.Imm32(0.0f) + : IR::F32{}; + const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{}; + + const auto new_inst = [&] -> IR::Value { + if (inst_info.is_gather) { + if (inst_info.is_depth) { + return ir.ImageGatherDref(handle, coords, offset, dref, inst_info); + } + return ir.ImageGather(handle, coords, offset, inst_info); + } + if (inst_info.has_derivatives) { + return ir.ImageGradient(handle, coords, derivatives_dx, derivatives_dy, offset, + lod_clamp, inst_info); + } + if (inst_info.is_depth) { + if (explicit_lod) { + return ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, inst_info); + } + return ir.ImageSampleDrefImplicitLod(handle, coords, dref, bias, offset, inst_info); + } + if (explicit_lod) { + return ir.ImageSampleExplicitLod(handle, coords, lod, offset, inst_info); + } + return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); + }(); + inst.ReplaceUsesWith(new_inst); +} + void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { const auto pred = [](const IR::Inst* inst) -> std::optional { const auto opcode = inst->GetOpcode(); @@ -498,40 +661,18 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip .sgpr_base = tsharp.sgpr_base, .dword_offset = tsharp.dword_offset, .type = type, - .nfmt = static_cast(image.GetNumberFmt()), + .nfmt = image.GetNumberFmt(), .is_storage = is_storage, .is_depth = bool(inst_info.is_depth), .is_atomic = IsImageAtomicInstruction(inst), .is_array = bool(inst_info.is_array), }); - // Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions - const u32 sampler_binding = [&] { - if (!has_sampler) { - return 0U; - } - const IR::Value& handle = producer->Arg(1); - // Inline sampler resource. - if (handle.IsImmediate()) { - LOG_WARNING(Render_Vulkan, "Inline sampler detected"); - return descriptors.Add(SamplerResource{ - .sgpr_base = std::numeric_limits::max(), - .dword_offset = 0, - .inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()}, - }); - } - // Normal sampler resource. - const auto ssharp_handle = handle.InstRecursive(); - const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle); - const auto ssharp = TrackSharp(ssharp_ud); - return descriptors.Add(SamplerResource{ - .sgpr_base = ssharp.sgpr_base, - .dword_offset = ssharp.dword_offset, - .associated_image = image_binding, - .disable_aniso = disable_aniso, - }); - }(); - image_binding |= (sampler_binding << 16); + // Sample instructions must be resolved into a new instruction using address register data. + if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) { + PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image); + return; + } // Patch image handle IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -568,62 +709,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); - if (inst_info.has_offset) { - // The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16]. - const u32 arg_pos = ImageOffsetArgumentPosition(inst); - const IR::Value arg = inst.Arg(arg_pos); - ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type"); - - const auto read = [&](u32 offset) -> IR::U32 { - if (arg.IsImmediate()) { - const u16 comp = (arg.U32() >> offset) & 0x3F; - return ir.Imm32(s32(comp << 26) >> 26); - } - return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true); - }; - - switch (image.GetType()) { - case AmdGpu::ImageType::Color1D: - case AmdGpu::ImageType::Color1DArray: - inst.SetArg(arg_pos, read(0)); - break; - case AmdGpu::ImageType::Color2D: - case AmdGpu::ImageType::Color2DArray: - inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8))); - break; - case AmdGpu::ImageType::Color3D: - inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16))); - break; - default: - UNREACHABLE(); - } - } - if (inst_info.has_derivatives) { - ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D || - image.GetType() == AmdGpu::ImageType::Color2DArray, - "User derivatives only supported for 2D images"); - } - if (inst_info.has_lod_clamp) { - const u32 arg_pos = [&]() -> u32 { - switch (inst.GetOpcode()) { - case IR::Opcode::ImageSampleImplicitLod: - return 2; - case IR::Opcode::ImageSampleDrefImplicitLod: - return 3; - default: - break; - } - return inst_info.is_depth ? 5 : 4; - }(); - inst.SetArg(arg_pos, arg); - } - if (inst_info.explicit_lod) { - ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch || - inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod || - inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod); - const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3; - const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg; - inst.SetArg(pos, value); + if (inst_info.has_lod) { + ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); + inst.SetArg(3, arg); } } diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 5facaf5c7..d7c0b1db5 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -33,11 +33,12 @@ union TextureInstInfo { BitField<1, 1, u32> has_bias; BitField<2, 1, u32> has_lod_clamp; BitField<3, 1, u32> force_level0; - BitField<4, 1, u32> explicit_lod; + BitField<4, 1, u32> has_lod; BitField<5, 1, u32> has_offset; BitField<6, 2, u32> gather_comp; BitField<8, 1, u32> has_derivatives; BitField<9, 1, u32> is_array; + BitField<10, 1, u32> is_gather; }; union BufferInstInfo { diff --git a/src/shader_recompiler/ir/value.h b/src/shader_recompiler/ir/value.h index 060b9d2bb..a282b9168 100644 --- a/src/shader_recompiler/ir/value.h +++ b/src/shader_recompiler/ir/value.h @@ -209,7 +209,7 @@ private: union { NonTriviallyDummy dummy{}; boost::container::small_vector, 2> phi_args; - std::array args; + std::array args; }; }; static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4d15c2072..03936f3a8 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -153,7 +153,10 @@ struct RuntimeInfo { ComputeRuntimeInfo cs_info; }; - RuntimeInfo(Stage stage_) : stage{stage_} {} + RuntimeInfo(Stage stage_) { + memset(this, 0, sizeof(*this)); + stage = stage_; + } bool operator==(const RuntimeInfo& other) const noexcept { switch (stage) {