shader_recompiler: Move 10/11-bit float conversion to functions and address some comments.

This commit is contained in:
squidbus 2025-02-06 16:12:39 -08:00
parent 504037cdc6
commit 4f8a6ea81a
6 changed files with 161 additions and 130 deletions

View File

@ -155,127 +155,33 @@ Id EmitUnpackSint4x8(EmitContext& ctx, Id value) {
return ctx.OpBitcast(ctx.F32[4], unpacked);
}
Id Float32ToUfloatM5(EmitContext& ctx, Id value, u32 mantissa_bits) {
const auto raw_value{ctx.OpBitcast(ctx.U32[1], value)};
const auto raw_exponent{
ctx.OpBitFieldUExtract(ctx.U32[1], raw_value, ctx.ConstU32(23U), ctx.ConstU32(8U))};
const auto sign{
ctx.OpBitFieldUExtract(ctx.U32[1], raw_value, ctx.ConstU32(31U), ctx.ConstU32(1U))};
const auto exponent{
ctx.OpFSub(ctx.F32[1], ctx.OpConvertUToF(ctx.F32[1], raw_exponent), ctx.ConstF32(127.f))};
const auto is_zero{ctx.OpLogicalOr(ctx.U1[1],
ctx.OpIEqual(ctx.U1[1], raw_value, ctx.ConstU32(0U)),
ctx.OpIEqual(ctx.U1[1], sign, ctx.ConstU32(1U)))};
const auto is_nan{ctx.OpIsNan(ctx.U1[1], value)};
const auto is_inf{ctx.OpIsInf(ctx.U1[1], value)};
const auto is_denorm{ctx.OpFOrdLessThanEqual(ctx.U1[1], exponent, ctx.ConstF32(-15.f))};
const auto denorm_mantissa{ctx.OpConvertFToU(
ctx.U32[1],
ctx.OpRoundEven(ctx.F32[1],
ctx.OpFMul(ctx.F32[1], value,
ctx.ConstF32(static_cast<float>(1 << (mantissa_bits + 14))))))};
const auto denorm_overflow{ctx.OpINotEqual(
ctx.U1[1],
ctx.OpShiftRightLogical(ctx.U32[1], denorm_mantissa, ctx.ConstU32(mantissa_bits)),
ctx.ConstU32(0U))};
const auto denorm{ctx.OpSelect(ctx.U32[1], denorm_overflow, ctx.ConstU32(1U << mantissa_bits),
denorm_mantissa)};
const auto norm_mantissa{ctx.OpConvertFToU(
ctx.U32[1],
ctx.OpRoundEven(
ctx.F32[1],
ctx.OpFMul(
ctx.F32[1], value,
ctx.OpExp2(ctx.F32[1],
ctx.OpFSub(ctx.F32[1], ctx.ConstF32(static_cast<float>(mantissa_bits)),
exponent)))))};
const auto norm_overflow{
ctx.OpUGreaterThanEqual(ctx.U1[1], norm_mantissa, ctx.ConstU32(2U << mantissa_bits))};
const auto norm_final_mantissa{ctx.OpBitwiseAnd(
ctx.U32[1],
ctx.OpSelect(ctx.U32[1], norm_overflow,
ctx.OpShiftRightLogical(ctx.U32[1], norm_mantissa, ctx.ConstU32(1U)),
norm_mantissa),
ctx.ConstU32((1U << mantissa_bits) - 1))};
const auto norm_final_exponent{ctx.OpConvertFToU(
ctx.U32[1],
ctx.OpFAdd(ctx.F32[1],
ctx.OpSelect(ctx.F32[1], norm_overflow,
ctx.OpFAdd(ctx.F32[1], exponent, ctx.ConstF32(1.f)), exponent),
ctx.ConstF32(15.f)))};
const auto norm{ctx.OpBitFieldInsert(ctx.U32[1], norm_final_mantissa, norm_final_exponent,
ctx.ConstU32(mantissa_bits), ctx.ConstU32(5U))};
return ctx.OpSelect(
ctx.U32[1], is_zero, ctx.ConstU32(0U),
ctx.OpSelect(ctx.U32[1], is_nan, ctx.ConstU32(31u << mantissa_bits | 1U),
ctx.OpSelect(ctx.U32[1], is_inf, ctx.ConstU32(31U << mantissa_bits),
ctx.OpSelect(ctx.U32[1], is_denorm, denorm, norm))));
}
Id EmitPackUfloat10_11_11(EmitContext& ctx, Id value) {
// No SPIR-V instruction for this, do it manually.
const auto x{ctx.OpCompositeExtract(ctx.F32[1], value, 0)};
const auto y{ctx.OpCompositeExtract(ctx.F32[1], value, 1)};
const auto z{ctx.OpCompositeExtract(ctx.F32[1], value, 2)};
auto result = Float32ToUfloatM5(ctx, x, 6U);
result = ctx.OpBitFieldInsert(ctx.U32[1], result, Float32ToUfloatM5(ctx, y, 6U),
ctx.ConstU32(11U), ctx.ConstU32(11U));
result = ctx.OpBitFieldInsert(ctx.U32[1], result, Float32ToUfloatM5(ctx, z, 5U),
ctx.ConstU32(22U), ctx.ConstU32(10U));
const auto cvt_x{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, x)};
const auto cvt_y{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, y)};
const auto cvt_z{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf10, z)};
auto result = cvt_x;
result = ctx.OpBitFieldInsert(ctx.U32[1], result, cvt_y, ctx.ConstU32(11U), ctx.ConstU32(11U));
result = ctx.OpBitFieldInsert(ctx.U32[1], result, cvt_z, ctx.ConstU32(22U), ctx.ConstU32(10U));
return result;
}
Id UfloatM5ToFloat32(EmitContext& ctx, Id value, u32 mantissa_bits) {
const auto raw_mantissa{
ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(mantissa_bits))};
const auto raw_exponent{
ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(mantissa_bits), ctx.ConstU32(5U))};
const auto is_exp_max{ctx.OpIEqual(ctx.U1[1], raw_exponent, ctx.ConstU32(31u))};
const auto is_exp_min{ctx.OpIEqual(ctx.U1[1], raw_exponent, ctx.ConstU32(0u))};
const auto is_zero{ctx.OpIEqual(ctx.U1[1], value, ctx.ConstU32(0u))};
const auto is_nan{ctx.OpLogicalAnd(ctx.U1[1], is_exp_max,
ctx.OpINotEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))};
const auto is_inf{ctx.OpLogicalAnd(ctx.U1[1], is_exp_max,
ctx.OpIEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))};
const auto is_denorm{ctx.OpLogicalAnd(
ctx.U1[1], is_exp_min, ctx.OpINotEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))};
const auto mantissa{ctx.OpConvertUToF(ctx.F32[1], raw_mantissa)};
const auto exponent{
ctx.OpFSub(ctx.F32[1], ctx.OpConvertUToF(ctx.F32[1], raw_exponent), ctx.ConstF32(15.f))};
const auto denorm{ctx.OpFMul(ctx.F32[1], mantissa, ctx.ConstF32(1.f / (1 << 20)))};
const auto norm{ctx.OpFMul(
ctx.F32[1],
ctx.OpFAdd(ctx.F32[1],
ctx.OpFMul(ctx.F32[1], mantissa,
ctx.ConstF32(1.f / static_cast<float>(1 << mantissa_bits))),
ctx.ConstF32(1.f)),
ctx.OpExp2(ctx.F32[1], exponent))};
return ctx.OpSelect(
ctx.F32[1], is_zero, ctx.ConstF32(0.f),
ctx.OpSelect(ctx.F32[1], is_nan, ctx.ConstF32(NAN),
ctx.OpSelect(ctx.F32[1], is_inf, ctx.ConstF32(INFINITY),
ctx.OpSelect(ctx.F32[1], is_denorm, denorm, norm))));
}
Id EmitUnpackUfloat10_11_11(EmitContext& ctx, Id value) {
// No SPIR-V instruction for this, do it manually.
const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(11U))};
const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(11U), ctx.ConstU32(11U))};
const auto z{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(22U), ctx.ConstU32(10U))};
return ctx.OpCompositeConstruct(ctx.F32[3], UfloatM5ToFloat32(ctx, x, 6U),
UfloatM5ToFloat32(ctx, y, 6U), UfloatM5ToFloat32(ctx, z, 5U));
const auto cvt_x{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, x)};
const auto cvt_y{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, y)};
const auto cvt_z{ctx.OpFunctionCall(ctx.F32[1], ctx.uf10_to_f32, z)};
return ctx.OpCompositeConstruct(ctx.F32[3], cvt_x, cvt_y, cvt_z);
}
Id EmitPackUnorm2_10_10_10(EmitContext& ctx, Id value) {

View File

@ -75,6 +75,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
DefineSharedMemory();
DefineBuffers();
DefineImagesAndSamplers();
DefineFunctions();
}
EmitContext::~EmitContext() = default;
@ -853,4 +854,117 @@ void EmitContext::DefineSharedMemory() {
}
}
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
const auto func_type{TypeFunction(U32[1], F32[1])};
const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
const auto value{OpFunctionParameter(F32[1])};
Name(func, name);
AddLabel();
const auto raw_value{OpBitcast(U32[1], value)};
const auto exponent{
OpBitcast(S32[1], OpBitFieldSExtract(U32[1], raw_value, ConstU32(23U), ConstU32(8U)))};
const auto sign{OpBitFieldUExtract(U32[1], raw_value, ConstU32(31U), ConstU32(1U))};
const auto is_zero{OpLogicalOr(U1[1], OpIEqual(U1[1], raw_value, ConstU32(0U)),
OpIEqual(U1[1], sign, ConstU32(1U)))};
const auto is_nan{OpIsNan(U1[1], value)};
const auto is_inf{OpIsInf(U1[1], value)};
const auto is_denorm{OpSLessThanEqual(U1[1], exponent, ConstS32(-15))};
const auto denorm_mantissa{OpConvertFToU(
U32[1],
OpRoundEven(F32[1], OpFMul(F32[1], value,
ConstF32(static_cast<float>(1 << (mantissa_bits + 14))))))};
const auto denorm_overflow{
OpINotEqual(U1[1], OpShiftRightLogical(U32[1], denorm_mantissa, ConstU32(mantissa_bits)),
ConstU32(0U))};
const auto denorm{
OpSelect(U32[1], denorm_overflow, ConstU32(1U << mantissa_bits), denorm_mantissa)};
const auto norm_mantissa{OpConvertFToU(
U32[1],
OpRoundEven(F32[1],
OpLdexp(F32[1], value,
OpISub(S32[1], ConstS32(static_cast<int>(mantissa_bits)), exponent))))};
const auto norm_overflow{
OpUGreaterThanEqual(U1[1], norm_mantissa, ConstU32(2U << mantissa_bits))};
const auto norm_final_mantissa{OpBitwiseAnd(
U32[1],
OpSelect(U32[1], norm_overflow, OpShiftRightLogical(U32[1], norm_mantissa, ConstU32(1U)),
norm_mantissa),
ConstU32((1U << mantissa_bits) - 1))};
const auto norm_final_exponent{OpBitcast(
U32[1],
OpIAdd(S32[1],
OpSelect(S32[1], norm_overflow, OpIAdd(S32[1], exponent, ConstS32(1)), exponent),
ConstS32(15)))};
const auto norm{OpBitFieldInsert(U32[1], norm_final_mantissa, norm_final_exponent,
ConstU32(mantissa_bits), ConstU32(5U))};
const auto result{OpSelect(U32[1], is_zero, ConstU32(0U),
OpSelect(U32[1], is_nan, ConstU32(31u << mantissa_bits | 1U),
OpSelect(U32[1], is_inf, ConstU32(31U << mantissa_bits),
OpSelect(U32[1], is_denorm, denorm, norm))))};
OpReturnValue(result);
OpFunctionEnd();
return func;
}
Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_view name) {
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
const auto func_type{TypeFunction(F32[1], U32[1])};
const auto func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type)};
const auto value{OpFunctionParameter(U32[1])};
Name(func, name);
AddLabel();
const auto raw_mantissa{
OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))};
const auto mantissa{OpConvertUToF(F32[1], raw_mantissa)};
const auto exponent{OpBitcast(
S32[1], OpBitFieldSExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U)))};
const auto is_exp_neg_one{OpIEqual(U1[1], exponent, ConstS32(-1))};
const auto is_exp_zero{OpIEqual(U1[1], exponent, ConstS32(0))};
const auto is_zero{OpIEqual(U1[1], value, ConstU32(0u))};
const auto is_nan{
OpLogicalAnd(U1[1], is_exp_neg_one, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
const auto is_inf{
OpLogicalAnd(U1[1], is_exp_neg_one, OpIEqual(U1[1], raw_mantissa, ConstU32(0u)))};
const auto is_denorm{
OpLogicalAnd(U1[1], is_exp_zero, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
const auto denorm{OpFMul(F32[1], mantissa, ConstF32(1.f / (1 << 20)))};
const auto norm{OpLdexp(
F32[1],
OpFAdd(F32[1],
OpFMul(F32[1], mantissa, ConstF32(1.f / static_cast<float>(1 << mantissa_bits))),
ConstF32(1.f)),
exponent)};
const auto result{OpSelect(F32[1], is_zero, ConstF32(0.f),
OpSelect(F32[1], is_nan, ConstF32(NAN),
OpSelect(F32[1], is_inf, ConstF32(INFINITY),
OpSelect(F32[1], is_denorm, denorm, norm))))};
OpReturnValue(result);
OpFunctionEnd();
return func;
}
void EmitContext::DefineFunctions() {
if (info.uses_pack_10_11_11) {
f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
f32_to_uf10 = DefineFloat32ToUfloatM5(5, "f32_to_uf10");
}
if (info.uses_unpack_10_11_11) {
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
}
}
} // namespace Shader::Backend::SPIRV

View File

@ -260,6 +260,11 @@ public:
std::array<SpirvAttribute, IR::NumParams> output_params{};
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
Id uf11_to_f32{};
Id f32_to_uf11{};
Id uf10_to_f32{};
Id f32_to_uf10{};
private:
void DefineArithmeticTypes();
void DefineInterfaces();
@ -269,9 +274,13 @@ private:
void DefineBuffers();
void DefineImagesAndSamplers();
void DefineSharedMemory();
void DefineFunctions();
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
bool output);
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
};
} // namespace Shader::Backend::SPIRV

View File

@ -189,6 +189,8 @@ struct Info {
bool uses_shared{};
bool uses_fp16{};
bool uses_fp64{};
bool uses_pack_10_11_11{};
bool uses_unpack_10_11_11{};
bool stores_tess_level_outer{};
bool stores_tess_level_inner{};
bool translation_failed{}; // indicates that shader has unsupported instructions

View File

@ -76,6 +76,12 @@ void Visit(Info& info, const IR::Inst& inst) {
case IR::Opcode::ReadConst:
info.has_readconst = true;
break;
case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true;
break;
case IR::Opcode::UnpackUfloat10_11_11:
info.uses_unpack_10_11_11 = true;
break;
default:
break;
}

View File

@ -435,28 +435,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (pipeline->IsCompute()) {
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
// Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways. For cases of metadata read a warning will be
// logged.
const auto IsMetaUpdate = [&](const auto& desc) {
const auto sharp = desc.GetSharp(info);
const VAddr address = sharp.base_address;
if (desc.is_written) {
// Assume all slices were updates
if (texture_cache.ClearMeta(address)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return true;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
return false;
};
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
bool meta_read = false;
for (const auto& desc : info.buffers) {
@ -469,10 +447,26 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
}
}
// Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways. For cases of metadata read a warning will be
// logged.
if (!meta_read) {
for (const auto& desc : info.buffers) {
if (IsMetaUpdate(desc)) {
return false;
const auto sharp = desc.GetSharp(info);
const VAddr address = sharp.base_address;
if (desc.is_written) {
// Assume all slices were updates
if (texture_cache.ClearMeta(address)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return false;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan,
"Unexpected metadata read by a CS shader (buffer)");
}
}
}
}