mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
shader_recompiler: Move 10/11-bit float conversion to functions and address some comments.
This commit is contained in:
parent
504037cdc6
commit
4f8a6ea81a
@ -155,127 +155,33 @@ Id EmitUnpackSint4x8(EmitContext& ctx, Id value) {
|
||||
return ctx.OpBitcast(ctx.F32[4], unpacked);
|
||||
}
|
||||
|
||||
Id Float32ToUfloatM5(EmitContext& ctx, Id value, u32 mantissa_bits) {
|
||||
const auto raw_value{ctx.OpBitcast(ctx.U32[1], value)};
|
||||
const auto raw_exponent{
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], raw_value, ctx.ConstU32(23U), ctx.ConstU32(8U))};
|
||||
const auto sign{
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], raw_value, ctx.ConstU32(31U), ctx.ConstU32(1U))};
|
||||
|
||||
const auto exponent{
|
||||
ctx.OpFSub(ctx.F32[1], ctx.OpConvertUToF(ctx.F32[1], raw_exponent), ctx.ConstF32(127.f))};
|
||||
|
||||
const auto is_zero{ctx.OpLogicalOr(ctx.U1[1],
|
||||
ctx.OpIEqual(ctx.U1[1], raw_value, ctx.ConstU32(0U)),
|
||||
ctx.OpIEqual(ctx.U1[1], sign, ctx.ConstU32(1U)))};
|
||||
const auto is_nan{ctx.OpIsNan(ctx.U1[1], value)};
|
||||
const auto is_inf{ctx.OpIsInf(ctx.U1[1], value)};
|
||||
const auto is_denorm{ctx.OpFOrdLessThanEqual(ctx.U1[1], exponent, ctx.ConstF32(-15.f))};
|
||||
|
||||
const auto denorm_mantissa{ctx.OpConvertFToU(
|
||||
ctx.U32[1],
|
||||
ctx.OpRoundEven(ctx.F32[1],
|
||||
ctx.OpFMul(ctx.F32[1], value,
|
||||
ctx.ConstF32(static_cast<float>(1 << (mantissa_bits + 14))))))};
|
||||
const auto denorm_overflow{ctx.OpINotEqual(
|
||||
ctx.U1[1],
|
||||
ctx.OpShiftRightLogical(ctx.U32[1], denorm_mantissa, ctx.ConstU32(mantissa_bits)),
|
||||
ctx.ConstU32(0U))};
|
||||
const auto denorm{ctx.OpSelect(ctx.U32[1], denorm_overflow, ctx.ConstU32(1U << mantissa_bits),
|
||||
denorm_mantissa)};
|
||||
|
||||
const auto norm_mantissa{ctx.OpConvertFToU(
|
||||
ctx.U32[1],
|
||||
ctx.OpRoundEven(
|
||||
ctx.F32[1],
|
||||
ctx.OpFMul(
|
||||
ctx.F32[1], value,
|
||||
ctx.OpExp2(ctx.F32[1],
|
||||
ctx.OpFSub(ctx.F32[1], ctx.ConstF32(static_cast<float>(mantissa_bits)),
|
||||
exponent)))))};
|
||||
const auto norm_overflow{
|
||||
ctx.OpUGreaterThanEqual(ctx.U1[1], norm_mantissa, ctx.ConstU32(2U << mantissa_bits))};
|
||||
const auto norm_final_mantissa{ctx.OpBitwiseAnd(
|
||||
ctx.U32[1],
|
||||
ctx.OpSelect(ctx.U32[1], norm_overflow,
|
||||
ctx.OpShiftRightLogical(ctx.U32[1], norm_mantissa, ctx.ConstU32(1U)),
|
||||
norm_mantissa),
|
||||
ctx.ConstU32((1U << mantissa_bits) - 1))};
|
||||
const auto norm_final_exponent{ctx.OpConvertFToU(
|
||||
ctx.U32[1],
|
||||
ctx.OpFAdd(ctx.F32[1],
|
||||
ctx.OpSelect(ctx.F32[1], norm_overflow,
|
||||
ctx.OpFAdd(ctx.F32[1], exponent, ctx.ConstF32(1.f)), exponent),
|
||||
ctx.ConstF32(15.f)))};
|
||||
const auto norm{ctx.OpBitFieldInsert(ctx.U32[1], norm_final_mantissa, norm_final_exponent,
|
||||
ctx.ConstU32(mantissa_bits), ctx.ConstU32(5U))};
|
||||
|
||||
return ctx.OpSelect(
|
||||
ctx.U32[1], is_zero, ctx.ConstU32(0U),
|
||||
ctx.OpSelect(ctx.U32[1], is_nan, ctx.ConstU32(31u << mantissa_bits | 1U),
|
||||
ctx.OpSelect(ctx.U32[1], is_inf, ctx.ConstU32(31U << mantissa_bits),
|
||||
ctx.OpSelect(ctx.U32[1], is_denorm, denorm, norm))));
|
||||
}
|
||||
|
||||
Id EmitPackUfloat10_11_11(EmitContext& ctx, Id value) {
|
||||
// No SPIR-V instruction for this, do it manually.
|
||||
const auto x{ctx.OpCompositeExtract(ctx.F32[1], value, 0)};
|
||||
const auto y{ctx.OpCompositeExtract(ctx.F32[1], value, 1)};
|
||||
const auto z{ctx.OpCompositeExtract(ctx.F32[1], value, 2)};
|
||||
|
||||
auto result = Float32ToUfloatM5(ctx, x, 6U);
|
||||
result = ctx.OpBitFieldInsert(ctx.U32[1], result, Float32ToUfloatM5(ctx, y, 6U),
|
||||
ctx.ConstU32(11U), ctx.ConstU32(11U));
|
||||
result = ctx.OpBitFieldInsert(ctx.U32[1], result, Float32ToUfloatM5(ctx, z, 5U),
|
||||
ctx.ConstU32(22U), ctx.ConstU32(10U));
|
||||
const auto cvt_x{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, x)};
|
||||
const auto cvt_y{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf11, y)};
|
||||
const auto cvt_z{ctx.OpFunctionCall(ctx.U32[1], ctx.f32_to_uf10, z)};
|
||||
|
||||
auto result = cvt_x;
|
||||
result = ctx.OpBitFieldInsert(ctx.U32[1], result, cvt_y, ctx.ConstU32(11U), ctx.ConstU32(11U));
|
||||
result = ctx.OpBitFieldInsert(ctx.U32[1], result, cvt_z, ctx.ConstU32(22U), ctx.ConstU32(10U));
|
||||
return result;
|
||||
}
|
||||
|
||||
Id UfloatM5ToFloat32(EmitContext& ctx, Id value, u32 mantissa_bits) {
|
||||
const auto raw_mantissa{
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(mantissa_bits))};
|
||||
const auto raw_exponent{
|
||||
ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(mantissa_bits), ctx.ConstU32(5U))};
|
||||
|
||||
const auto is_exp_max{ctx.OpIEqual(ctx.U1[1], raw_exponent, ctx.ConstU32(31u))};
|
||||
const auto is_exp_min{ctx.OpIEqual(ctx.U1[1], raw_exponent, ctx.ConstU32(0u))};
|
||||
|
||||
const auto is_zero{ctx.OpIEqual(ctx.U1[1], value, ctx.ConstU32(0u))};
|
||||
const auto is_nan{ctx.OpLogicalAnd(ctx.U1[1], is_exp_max,
|
||||
ctx.OpINotEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))};
|
||||
const auto is_inf{ctx.OpLogicalAnd(ctx.U1[1], is_exp_max,
|
||||
ctx.OpIEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))};
|
||||
const auto is_denorm{ctx.OpLogicalAnd(
|
||||
ctx.U1[1], is_exp_min, ctx.OpINotEqual(ctx.U1[1], raw_mantissa, ctx.ConstU32(0u)))};
|
||||
|
||||
const auto mantissa{ctx.OpConvertUToF(ctx.F32[1], raw_mantissa)};
|
||||
const auto exponent{
|
||||
ctx.OpFSub(ctx.F32[1], ctx.OpConvertUToF(ctx.F32[1], raw_exponent), ctx.ConstF32(15.f))};
|
||||
|
||||
const auto denorm{ctx.OpFMul(ctx.F32[1], mantissa, ctx.ConstF32(1.f / (1 << 20)))};
|
||||
const auto norm{ctx.OpFMul(
|
||||
ctx.F32[1],
|
||||
ctx.OpFAdd(ctx.F32[1],
|
||||
ctx.OpFMul(ctx.F32[1], mantissa,
|
||||
ctx.ConstF32(1.f / static_cast<float>(1 << mantissa_bits))),
|
||||
ctx.ConstF32(1.f)),
|
||||
ctx.OpExp2(ctx.F32[1], exponent))};
|
||||
|
||||
return ctx.OpSelect(
|
||||
ctx.F32[1], is_zero, ctx.ConstF32(0.f),
|
||||
ctx.OpSelect(ctx.F32[1], is_nan, ctx.ConstF32(NAN),
|
||||
ctx.OpSelect(ctx.F32[1], is_inf, ctx.ConstF32(INFINITY),
|
||||
ctx.OpSelect(ctx.F32[1], is_denorm, denorm, norm))));
|
||||
}
|
||||
|
||||
Id EmitUnpackUfloat10_11_11(EmitContext& ctx, Id value) {
|
||||
// No SPIR-V instruction for this, do it manually.
|
||||
const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(11U))};
|
||||
const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(11U), ctx.ConstU32(11U))};
|
||||
const auto z{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(22U), ctx.ConstU32(10U))};
|
||||
|
||||
return ctx.OpCompositeConstruct(ctx.F32[3], UfloatM5ToFloat32(ctx, x, 6U),
|
||||
UfloatM5ToFloat32(ctx, y, 6U), UfloatM5ToFloat32(ctx, z, 5U));
|
||||
const auto cvt_x{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, x)};
|
||||
const auto cvt_y{ctx.OpFunctionCall(ctx.F32[1], ctx.uf11_to_f32, y)};
|
||||
const auto cvt_z{ctx.OpFunctionCall(ctx.F32[1], ctx.uf10_to_f32, z)};
|
||||
|
||||
return ctx.OpCompositeConstruct(ctx.F32[3], cvt_x, cvt_y, cvt_z);
|
||||
}
|
||||
|
||||
Id EmitPackUnorm2_10_10_10(EmitContext& ctx, Id value) {
|
||||
|
@ -75,6 +75,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
|
||||
DefineSharedMemory();
|
||||
DefineBuffers();
|
||||
DefineImagesAndSamplers();
|
||||
DefineFunctions();
|
||||
}
|
||||
|
||||
EmitContext::~EmitContext() = default;
|
||||
@ -853,4 +854,117 @@ void EmitContext::DefineSharedMemory() {
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitContext::DefineFloat32ToUfloatM5(u32 mantissa_bits, const std::string_view name) {
|
||||
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
|
||||
const auto func_type{TypeFunction(U32[1], F32[1])};
|
||||
const auto func{OpFunction(U32[1], spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const auto value{OpFunctionParameter(F32[1])};
|
||||
Name(func, name);
|
||||
AddLabel();
|
||||
|
||||
const auto raw_value{OpBitcast(U32[1], value)};
|
||||
const auto exponent{
|
||||
OpBitcast(S32[1], OpBitFieldSExtract(U32[1], raw_value, ConstU32(23U), ConstU32(8U)))};
|
||||
const auto sign{OpBitFieldUExtract(U32[1], raw_value, ConstU32(31U), ConstU32(1U))};
|
||||
|
||||
const auto is_zero{OpLogicalOr(U1[1], OpIEqual(U1[1], raw_value, ConstU32(0U)),
|
||||
OpIEqual(U1[1], sign, ConstU32(1U)))};
|
||||
const auto is_nan{OpIsNan(U1[1], value)};
|
||||
const auto is_inf{OpIsInf(U1[1], value)};
|
||||
const auto is_denorm{OpSLessThanEqual(U1[1], exponent, ConstS32(-15))};
|
||||
|
||||
const auto denorm_mantissa{OpConvertFToU(
|
||||
U32[1],
|
||||
OpRoundEven(F32[1], OpFMul(F32[1], value,
|
||||
ConstF32(static_cast<float>(1 << (mantissa_bits + 14))))))};
|
||||
const auto denorm_overflow{
|
||||
OpINotEqual(U1[1], OpShiftRightLogical(U32[1], denorm_mantissa, ConstU32(mantissa_bits)),
|
||||
ConstU32(0U))};
|
||||
const auto denorm{
|
||||
OpSelect(U32[1], denorm_overflow, ConstU32(1U << mantissa_bits), denorm_mantissa)};
|
||||
|
||||
const auto norm_mantissa{OpConvertFToU(
|
||||
U32[1],
|
||||
OpRoundEven(F32[1],
|
||||
OpLdexp(F32[1], value,
|
||||
OpISub(S32[1], ConstS32(static_cast<int>(mantissa_bits)), exponent))))};
|
||||
const auto norm_overflow{
|
||||
OpUGreaterThanEqual(U1[1], norm_mantissa, ConstU32(2U << mantissa_bits))};
|
||||
const auto norm_final_mantissa{OpBitwiseAnd(
|
||||
U32[1],
|
||||
OpSelect(U32[1], norm_overflow, OpShiftRightLogical(U32[1], norm_mantissa, ConstU32(1U)),
|
||||
norm_mantissa),
|
||||
ConstU32((1U << mantissa_bits) - 1))};
|
||||
const auto norm_final_exponent{OpBitcast(
|
||||
U32[1],
|
||||
OpIAdd(S32[1],
|
||||
OpSelect(S32[1], norm_overflow, OpIAdd(S32[1], exponent, ConstS32(1)), exponent),
|
||||
ConstS32(15)))};
|
||||
const auto norm{OpBitFieldInsert(U32[1], norm_final_mantissa, norm_final_exponent,
|
||||
ConstU32(mantissa_bits), ConstU32(5U))};
|
||||
|
||||
const auto result{OpSelect(U32[1], is_zero, ConstU32(0U),
|
||||
OpSelect(U32[1], is_nan, ConstU32(31u << mantissa_bits | 1U),
|
||||
OpSelect(U32[1], is_inf, ConstU32(31U << mantissa_bits),
|
||||
OpSelect(U32[1], is_denorm, denorm, norm))))};
|
||||
|
||||
OpReturnValue(result);
|
||||
OpFunctionEnd();
|
||||
return func;
|
||||
}
|
||||
|
||||
Id EmitContext::DefineUfloatM5ToFloat32(u32 mantissa_bits, const std::string_view name) {
|
||||
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/util/format_r11g11b10f.h
|
||||
const auto func_type{TypeFunction(F32[1], U32[1])};
|
||||
const auto func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type)};
|
||||
const auto value{OpFunctionParameter(U32[1])};
|
||||
Name(func, name);
|
||||
AddLabel();
|
||||
|
||||
const auto raw_mantissa{
|
||||
OpBitFieldUExtract(U32[1], value, ConstU32(0U), ConstU32(mantissa_bits))};
|
||||
const auto mantissa{OpConvertUToF(F32[1], raw_mantissa)};
|
||||
const auto exponent{OpBitcast(
|
||||
S32[1], OpBitFieldSExtract(U32[1], value, ConstU32(mantissa_bits), ConstU32(5U)))};
|
||||
|
||||
const auto is_exp_neg_one{OpIEqual(U1[1], exponent, ConstS32(-1))};
|
||||
const auto is_exp_zero{OpIEqual(U1[1], exponent, ConstS32(0))};
|
||||
|
||||
const auto is_zero{OpIEqual(U1[1], value, ConstU32(0u))};
|
||||
const auto is_nan{
|
||||
OpLogicalAnd(U1[1], is_exp_neg_one, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
|
||||
const auto is_inf{
|
||||
OpLogicalAnd(U1[1], is_exp_neg_one, OpIEqual(U1[1], raw_mantissa, ConstU32(0u)))};
|
||||
const auto is_denorm{
|
||||
OpLogicalAnd(U1[1], is_exp_zero, OpINotEqual(U1[1], raw_mantissa, ConstU32(0u)))};
|
||||
|
||||
const auto denorm{OpFMul(F32[1], mantissa, ConstF32(1.f / (1 << 20)))};
|
||||
const auto norm{OpLdexp(
|
||||
F32[1],
|
||||
OpFAdd(F32[1],
|
||||
OpFMul(F32[1], mantissa, ConstF32(1.f / static_cast<float>(1 << mantissa_bits))),
|
||||
ConstF32(1.f)),
|
||||
exponent)};
|
||||
|
||||
const auto result{OpSelect(F32[1], is_zero, ConstF32(0.f),
|
||||
OpSelect(F32[1], is_nan, ConstF32(NAN),
|
||||
OpSelect(F32[1], is_inf, ConstF32(INFINITY),
|
||||
OpSelect(F32[1], is_denorm, denorm, norm))))};
|
||||
|
||||
OpReturnValue(result);
|
||||
OpFunctionEnd();
|
||||
return func;
|
||||
}
|
||||
|
||||
void EmitContext::DefineFunctions() {
|
||||
if (info.uses_pack_10_11_11) {
|
||||
f32_to_uf11 = DefineFloat32ToUfloatM5(6, "f32_to_uf11");
|
||||
f32_to_uf10 = DefineFloat32ToUfloatM5(5, "f32_to_uf10");
|
||||
}
|
||||
if (info.uses_unpack_10_11_11) {
|
||||
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
|
||||
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -260,6 +260,11 @@ public:
|
||||
std::array<SpirvAttribute, IR::NumParams> output_params{};
|
||||
std::array<SpirvAttribute, IR::NumRenderTargets> frag_outputs{};
|
||||
|
||||
Id uf11_to_f32{};
|
||||
Id f32_to_uf11{};
|
||||
Id uf10_to_f32{};
|
||||
Id f32_to_uf10{};
|
||||
|
||||
private:
|
||||
void DefineArithmeticTypes();
|
||||
void DefineInterfaces();
|
||||
@ -269,9 +274,13 @@ private:
|
||||
void DefineBuffers();
|
||||
void DefineImagesAndSamplers();
|
||||
void DefineSharedMemory();
|
||||
void DefineFunctions();
|
||||
|
||||
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components,
|
||||
bool output);
|
||||
|
||||
Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name);
|
||||
Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name);
|
||||
};
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -189,6 +189,8 @@ struct Info {
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool uses_pack_10_11_11{};
|
||||
bool uses_unpack_10_11_11{};
|
||||
bool stores_tess_level_outer{};
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
@ -76,6 +76,12 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
case IR::Opcode::ReadConst:
|
||||
info.has_readconst = true;
|
||||
break;
|
||||
case IR::Opcode::PackUfloat10_11_11:
|
||||
info.uses_pack_10_11_11 = true;
|
||||
break;
|
||||
case IR::Opcode::UnpackUfloat10_11_11:
|
||||
info.uses_unpack_10_11_11 = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -435,28 +435,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
if (pipeline->IsCompute()) {
|
||||
const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute);
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||
// will need its full emulation anyways. For cases of metadata read a warning will be
|
||||
// logged.
|
||||
const auto IsMetaUpdate = [&](const auto& desc) {
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const VAddr address = sharp.base_address;
|
||||
if (desc.is_written) {
|
||||
// Assume all slices were updates
|
||||
if (texture_cache.ClearMeta(address)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Assume if a shader reads and writes metas at the same time, it is a copy shader.
|
||||
bool meta_read = false;
|
||||
for (const auto& desc : info.buffers) {
|
||||
@ -469,10 +447,26 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
|
||||
}
|
||||
}
|
||||
|
||||
// Most of the time when a metadata is updated with a shader it gets cleared. It means
|
||||
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
|
||||
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
|
||||
// will need its full emulation anyways. For cases of metadata read a warning will be
|
||||
// logged.
|
||||
if (!meta_read) {
|
||||
for (const auto& desc : info.buffers) {
|
||||
if (IsMetaUpdate(desc)) {
|
||||
return false;
|
||||
const auto sharp = desc.GetSharp(info);
|
||||
const VAddr address = sharp.base_address;
|
||||
if (desc.is_written) {
|
||||
// Assume all slices were updates
|
||||
if (texture_cache.ClearMeta(address)) {
|
||||
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (texture_cache.IsMeta(address)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Unexpected metadata read by a CS shader (buffer)");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user