diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp index 7c8f73ba9..efb2c550a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp @@ -155,127 +155,33 @@ Id EmitUnpackSint4x8(EmitContext& ctx, Id value) { return ctx.OpBitcast(ctx.F32[4], unpacked); } -Id Float32ToUfloatM5(EmitContext& ctx, Id value, u32 mantissa_bits) { - const auto raw_value{ctx.OpBitcast(ctx.U32[1], value)}; - const auto raw_exponent{ - ctx.OpBitFieldUExtract(ctx.U32[1], raw_value, ctx.ConstU32(23U), ctx.ConstU32(8U))}; - const auto sign{ - ctx.OpBitFieldUExtract(ctx.U32[1], raw_value, ctx.ConstU32(31U), ctx.ConstU32(1U))}; - - const auto exponent{ - ctx.OpFSub(ctx.F32[1], ctx.OpConvertUToF(ctx.F32[1], raw_exponent), ctx.ConstF32(127.f))}; - - const auto is_zero{ctx.OpLogicalOr(ctx.U1[1], - ctx.OpIEqual(ctx.U1[1], raw_value, ctx.ConstU32(0U)), - ctx.OpIEqual(ctx.U1[1], sign, ctx.ConstU32(1U)))}; - const auto is_nan{ctx.OpIsNan(ctx.U1[1], value)}; - const auto is_inf{ctx.OpIsInf(ctx.U1[1], value)}; - const auto is_denorm{ctx.OpFOrdLessThanEqual(ctx.U1[1], exponent, ctx.ConstF32(-15.f))}; - - const auto denorm_mantissa{ctx.OpConvertFToU( - ctx.U32[1], - ctx.OpRoundEven(ctx.F32[1], - ctx.OpFMul(ctx.F32[1], value, - ctx.ConstF32(static_cast(1 << (mantissa_bits + 14))))))}; - const auto denorm_overflow{ctx.OpINotEqual( - ctx.U1[1], - ctx.OpShiftRightLogical(ctx.U32[1], denorm_mantissa, ctx.ConstU32(mantissa_bits)), - ctx.ConstU32(0U))}; - const auto denorm{ctx.OpSelect(ctx.U32[1], denorm_overflow, ctx.ConstU32(1U << mantissa_bits), - denorm_mantissa)}; - - const auto norm_mantissa{ctx.OpConvertFToU( - ctx.U32[1], - ctx.OpRoundEven( - ctx.F32[1], - ctx.OpFMul( - ctx.F32[1], value, - ctx.OpExp2(ctx.F32[1], - ctx.OpFSub(ctx.F32[1], ctx.ConstF32(static_cast(mantissa_bits)), - exponent)))))}; - const auto norm_overflow{ - ctx.OpUGreaterThanEqual(ctx.U1[1], norm_mantissa, ctx.ConstU32(2U << mantissa_bits))}; - const auto norm_final_mantissa{ctx.OpBitwiseAnd( - ctx.U32[1], - ctx.OpSelect(ctx.U32[1], norm_overflow, - ctx.OpShiftRightLogical(ctx.U32[1], norm_mantissa, ctx.ConstU32(1U)), - norm_mantissa), - ctx.ConstU32((1U << mantissa_bits) - 1))}; - const auto norm_final_exponent{ctx.OpConvertFToU( - ctx.U32[1], - ctx.OpFAdd(ctx.F32[1], - ctx.OpSelect(ctx.F32[1], norm_overflow, - ctx.OpFAdd(ctx.F32[1], exponent, ctx.ConstF32(1.f)), exponent), - ctx.ConstF32(15.f)))}; - const auto norm{ctx.OpBitFieldInsert(ctx.U32[1], norm_final_mantissa, norm_final_exponent, - ctx.ConstU32(mantissa_bits), ctx.ConstU32(5U))}; - - return ctx.OpSelect( - ctx.U32[1], is_zero, ctx.ConstU32(0U), - ctx.OpSelect(ctx.U32[1], is_nan, ctx.ConstU32(31u << mantissa_bits | 1U), - ctx.OpSelect(ctx.U32[1], is_inf, ctx.ConstU32(31U << mantissa_bits), - ctx.OpSelect(ctx.U32[1], is_denorm, denorm, norm)))); -} - Id EmitPackUfloat10_11_11(EmitContext& ctx, Id value) { // No SPIR-V instruction for this, do it manually. const auto x{ctx.OpCompositeExtract(ctx.F32[1], value, 0)}; const auto y{ctx.OpCompositeExtract(ctx.F32[1], value, 1)}; const auto z{ctx.OpCompositeExtract(ctx.F32[1], value, 2)}; - auto result = Float32ToUfloatM5(ctx, x, 6U); - result = ctx.OpBitFieldInsert(ctx.U32[1], result, Float32ToUfloatM5(ctx, y, 6U), - ctx.ConstU32(11U), ctx.ConstU32(11U)); - result = ctx.OpBitFieldInsert(ctx.U32[1], result, Float32ToUfloatM5(ctx, z, 5U), - ctx.ConstU32(22U), ctx.ConstU32(10U)); + const auto cvt_x{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, x)}; + const auto cvt_y{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, y)}; + const auto cvt_z{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf10, z)}; + + auto result = cvt_x; + result = ctx.OpBitFieldInsert(ctx.U32[1], result, cvt_y, ctx.ConstU32(11U), ctx.ConstU32(11U)); + result = ctx.OpBitFieldInsert(ctx.U32[1], result, cvt_z, ctx.ConstU32(22U), ctx.ConstU32(10U)); return result; } -Id UfloatM5ToFloat32(EmitContext& ctx, Id value, u32 mantissa_bits) { - const auto raw_mantissa{ - ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(mantissa_bits))}; - const auto raw_exponent{ - ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(mantissa_bits), ctx.ConstU32(5U))}; - - const auto is_exp_max{ctx.OpIEqual(ctx.U1[1], raw_exponent, ctx.ConstU32(31u))}; - const auto is_exp_min{ctx.OpIEqual(ctx.U1[1], raw_exponent, ctx.ConstU32(0u))}; - - const auto is_zero{ctx.OpIEqual(ctx.U1[1], value, ctx.ConstU32(0u))}; - const auto is_nan{ctx.OpLogicalAnd(ctx.U1[1], is_exp_max, - ctx.OpINotEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))}; - const auto is_inf{ctx.OpLogicalAnd(ctx.U1[1], is_exp_max, - ctx.OpIEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))}; - const auto is_denorm{ctx.OpLogicalAnd( - ctx.U1[1], is_exp_min, ctx.OpINotEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))}; - - const auto mantissa{ctx.OpConvertUToF(ctx.F32[1], raw_mantissa)}; - const auto exponent{ - ctx.OpFSub(ctx.F32[1], ctx.OpConvertUToF(ctx.F32[1], raw_exponent), ctx.ConstF32(15.f))}; - - const auto denorm{ctx.OpFMul(ctx.F32[1], mantissa, ctx.ConstF32(1.f / (1 << 20)))}; - const auto norm{ctx.OpFMul( - ctx.F32[1], - ctx.OpFAdd(ctx.F32[1], - ctx.OpFMul(ctx.F32[1], mantissa, - ctx.ConstF32(1.f / static_cast(1 << mantissa_bits))), - ctx.ConstF32(1.f)), - ctx.OpExp2(ctx.F32[1], exponent))}; - - return ctx.OpSelect( - ctx.F32[1], is_zero, ctx.ConstF32(0.f), - ctx.OpSelect(ctx.F32[1], is_nan, ctx.ConstF32(NAN), - ctx.OpSelect(ctx.F32[1], is_inf, ctx.ConstF32(INFINITY), - ctx.OpSelect(ctx.F32[1], is_denorm, denorm, norm)))); -} - Id EmitUnpackUfloat10_11_11(EmitContext& ctx, Id value) { // No SPIR-V instruction for this, do it manually. const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(11U))}; const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(11U), ctx.ConstU32(11U))}; const auto z{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(22U), ctx.ConstU32(10U))}; - return ctx.OpCompositeConstruct(ctx.F32[3], UfloatM5ToFloat32(ctx, x, 6U), - UfloatM5ToFloat32(ctx, y, 6U), UfloatM5ToFloat32(ctx, z, 5U)); + const auto cvt_x{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, x)}; + const auto cvt_y{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, y)}; + const auto cvt_z{ctx.OpFunctionCall(ctx.F32[1], ctx.uf10_to_f32, z)}; + + return ctx.OpCompositeConstruct(ctx.F32[3], cvt_x, cvt_y, cvt_z); } Id EmitPackUnorm2_10_10_10(EmitContext& ctx, Id value) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 22c420d7d..13d727c72 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -75,6 +75,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineSharedMemory(); DefineBuffers(); DefineImagesAndSamplers(); + DefineFunctions(); } EmitContext::~EmitContext() = default; @@ -853,4 +854,117 @@ void EmitContext::DefineSharedMemory() { } } +Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) { + // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h + const auto func_type{TypeFunction(U32[1], F32[1])}; + const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)}; + const auto value{OpFunctionParameter(F32[1])}; + Name(func, name); + AddLabel(); + + const auto raw_value{OpBitcast(U32[1], value)}; + const auto exponent{ + OpBitcast(S32[1], OpBitFieldSExtract(U32[1], raw_value, ConstU32(23U), ConstU32(8U)))}; + const auto sign{OpBitFieldUExtract(U32[1], raw_value, ConstU32(31U), ConstU32(1U))}; + + const auto is_zero{OpLogicalOr(U1[1], OpIEqual(U1[1], raw_value, ConstU32(0U)), + OpIEqual(U1[1], sign, ConstU32(1U)))}; + const auto is_nan{OpIsNan(U1[1], value)}; + const auto is_inf{OpIsInf(U1[1], value)}; + const auto is_denorm{OpSLessThanEqual(U1[1], exponent, ConstS32(-15))}; + + const auto denorm_mantissa{OpConvertFToU( + U32[1], + OpRoundEven(F32[1], OpFMul(F32[1], value, + ConstF32(static_cast(1 << (mantissa_bits + 14))))))}; + const auto denorm_overflow{ + OpINotEqual(U1[1], OpShiftRightLogical(U32[1], denorm_mantissa, ConstU32(mantissa_bits)), + ConstU32(0U))}; + const auto denorm{ + OpSelect(U32[1], denorm_overflow, ConstU32(1U << mantissa_bits), denorm_mantissa)}; + + const auto norm_mantissa{OpConvertFToU( + U32[1], + OpRoundEven(F32[1], + OpLdexp(F32[1], value, + OpISub(S32[1], ConstS32(static_cast(mantissa_bits)), exponent))))}; + const auto norm_overflow{ + OpUGreaterThanEqual(U1[1], norm_mantissa, ConstU32(2U << mantissa_bits))}; + const auto norm_final_mantissa{OpBitwiseAnd( + U32[1], + OpSelect(U32[1], norm_overflow, OpShiftRightLogical(U32[1], norm_mantissa, ConstU32(1U)), + norm_mantissa), + ConstU32((1U << mantissa_bits) - 1))}; + const auto norm_final_exponent{OpBitcast( + U32[1], + OpIAdd(S32[1], + OpSelect(S32[1], norm_overflow, OpIAdd(S32[1], exponent, ConstS32(1)), exponent), + ConstS32(15)))}; + const auto norm{OpBitFieldInsert(U32[1], norm_final_mantissa, norm_final_exponent, + ConstU32(mantissa_bits), ConstU32(5U))}; + + const auto result{OpSelect(U32[1], is_zero, ConstU32(0U), + OpSelect(U32[1], is_nan, ConstU32(31u << mantissa_bits | 1U), + OpSelect(U32[1], is_inf, ConstU32(31U << mantissa_bits), + OpSelect(U32[1], is_denorm, denorm, norm))))}; + + OpReturnValue(result); + OpFunctionEnd(); + return func; +} + +Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_view name) { + // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h + const auto func_type{TypeFunction(F32[1], U32[1])}; + const auto func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type)}; + const auto value{OpFunctionParameter(U32[1])}; + Name(func, name); + AddLabel(); + + const auto raw_mantissa{ + OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))}; + const auto mantissa{OpConvertUToF(F32[1], raw_mantissa)}; + const auto exponent{OpBitcast( + S32[1], OpBitFieldSExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U)))}; + + const auto is_exp_neg_one{OpIEqual(U1[1], exponent, ConstS32(-1))}; + const auto is_exp_zero{OpIEqual(U1[1], exponent, ConstS32(0))}; + + const auto is_zero{OpIEqual(U1[1], value, ConstU32(0u))}; + const auto is_nan{ + OpLogicalAnd(U1[1], is_exp_neg_one, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))}; + const auto is_inf{ + OpLogicalAnd(U1[1], is_exp_neg_one, OpIEqual(U1[1], raw_mantissa, ConstU32(0u)))}; + const auto is_denorm{ + OpLogicalAnd(U1[1], is_exp_zero, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))}; + + const auto denorm{OpFMul(F32[1], mantissa, ConstF32(1.f / (1 << 20)))}; + const auto norm{OpLdexp( + F32[1], + OpFAdd(F32[1], + OpFMul(F32[1], mantissa, ConstF32(1.f / static_cast(1 << mantissa_bits))), + ConstF32(1.f)), + exponent)}; + + const auto result{OpSelect(F32[1], is_zero, ConstF32(0.f), + OpSelect(F32[1], is_nan, ConstF32(NAN), + OpSelect(F32[1], is_inf, ConstF32(INFINITY), + OpSelect(F32[1], is_denorm, denorm, norm))))}; + + OpReturnValue(result); + OpFunctionEnd(); + return func; +} + +void EmitContext::DefineFunctions() { + if (info.uses_pack_10_11_11) { + f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11"); + f32_to_uf10 = DefineFloat32ToUfloatM5(5, "f32_to_uf10"); + } + if (info.uses_unpack_10_11_11) { + uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32"); + uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32"); + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index c20d5510c..23fca4212 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -260,6 +260,11 @@ public: std::array output_params{}; std::array frag_outputs{}; + Id uf11_to_f32{}; + Id f32_to_uf11{}; + Id uf10_to_f32{}; + Id f32_to_uf10{}; + private: void DefineArithmeticTypes(); void DefineInterfaces(); @@ -269,9 +274,13 @@ private: void DefineBuffers(); void DefineImagesAndSamplers(); void DefineSharedMemory(); + void DefineFunctions(); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components, bool output); + + Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name); + Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index c1f2fe27d..498752607 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -189,6 +189,8 @@ struct Info { bool uses_shared{}; bool uses_fp16{}; bool uses_fp64{}; + bool uses_pack_10_11_11{}; + bool uses_unpack_10_11_11{}; bool stores_tess_level_outer{}; bool stores_tess_level_inner{}; bool translation_failed{}; // indicates that shader has unsupported instructions diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 3b93be8bc..f3a1fc9a8 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -76,6 +76,12 @@ void Visit(Info& info, const IR::Inst& inst) { case IR::Opcode::ReadConst: info.has_readconst = true; break; + case IR::Opcode::PackUfloat10_11_11: + info.uses_pack_10_11_11 = true; + break; + case IR::Opcode::UnpackUfloat10_11_11: + info.uses_unpack_10_11_11 = true; + break; default: break; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index eefb1c446..6f979a734 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -435,28 +435,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { if (pipeline->IsCompute()) { const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); - // Most of the time when a metadata is updated with a shader it gets cleared. It means - // we can skip the whole dispatch and update the tracked state instead. Also, it is not - // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we - // will need its full emulation anyways. For cases of metadata read a warning will be - // logged. - const auto IsMetaUpdate = [&](const auto& desc) { - const auto sharp = desc.GetSharp(info); - const VAddr address = sharp.base_address; - if (desc.is_written) { - // Assume all slices were updates - if (texture_cache.ClearMeta(address)) { - LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return true; - } - } else { - if (texture_cache.IsMeta(address)) { - LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); - } - } - return false; - }; - // Assume if a shader reads and writes metas at the same time, it is a copy shader. bool meta_read = false; for (const auto& desc : info.buffers) { @@ -469,10 +447,26 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { } } + // Most of the time when a metadata is updated with a shader it gets cleared. It means + // we can skip the whole dispatch and update the tracked state instead. Also, it is not + // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we + // will need its full emulation anyways. For cases of metadata read a warning will be + // logged. if (!meta_read) { for (const auto& desc : info.buffers) { - if (IsMetaUpdate(desc)) { - return false; + const auto sharp = desc.GetSharp(info); + const VAddr address = sharp.base_address; + if (desc.is_written) { + // Assume all slices were updates + if (texture_cache.ClearMeta(address)) { + LOG_TRACE(Render_Vulkan, "Metadata update skipped"); + return false; + } + } else { + if (texture_cache.IsMeta(address)) { + LOG_WARNING(Render_Vulkan, + "Unexpected metadata read by a CS shader (buffer)"); + } } } }