ir: Perform degamma in shader when sampler sets force_degamma (#3420)

* ir: Perform degamma in shader when sampler sets force_degamma

* specialization: Add srgb if image is sampled

Might fix cases where sampler force_degamma is used with srgb image
This commit is contained in:
TheTurtle
2025-08-19 03:41:41 +03:00
committed by GitHub
parent 2d53d1a1e2
commit 0b02364f97
12 changed files with 82 additions and 43 deletions

View File

@@ -141,6 +141,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
return ctx.OpExp2(ctx.F32[1], value); return ctx.OpExp2(ctx.F32[1], value);
} }
Id EmitFPPow(EmitContext& ctx, Id x, Id y) {
return ctx.OpPow(ctx.F32[1], x, y);
}
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) { Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
return ctx.OpLdexp(ctx.F32[1], value, exp); return ctx.OpLdexp(ctx.F32[1], value, exp);
} }

View File

@@ -289,6 +289,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
Id EmitFPSin(EmitContext& ctx, Id value); Id EmitFPSin(EmitContext& ctx, Id value);
Id EmitFPCos(EmitContext& ctx, Id value); Id EmitFPCos(EmitContext& ctx, Id value);
Id EmitFPExp2(EmitContext& ctx, Id value); Id EmitFPExp2(EmitContext& ctx, Id value);
Id EmitFPPow(EmitContext& ctx, Id x, Id y);
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
Id EmitFPLog2(EmitContext& ctx, Id value); Id EmitFPLog2(EmitContext& ctx, Id value);
Id EmitFPRecip32(EmitContext& ctx, Id value); Id EmitFPRecip32(EmitContext& ctx, Id value);

View File

@@ -29,17 +29,6 @@ static constexpr size_t NumBuffers = 40;
static constexpr size_t NumSamplers = 16; static constexpr size_t NumSamplers = 16;
static constexpr size_t NumFMasks = 8; static constexpr size_t NumFMasks = 8;
enum class TextureType : u32 {
Color1D,
ColorArray1D,
Color2D,
ColorArray2D,
Color3D,
ColorCube,
Buffer,
};
constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class BufferType : u32 { enum class BufferType : u32 {
Guest, Guest,
Flatbuf, Flatbuf,

View File

@@ -1169,6 +1169,10 @@ F32 IREmitter::FPLog2(const F32& value) {
return Inst<F32>(Opcode::FPLog2, value); return Inst<F32>(Opcode::FPLog2, value);
} }
F32 IREmitter::FPPow(const F32& x, const F32& y) {
return Inst<F32>(Opcode::FPPow, x, y);
}
F32F64 IREmitter::FPRecip(const F32F64& value) { F32F64 IREmitter::FPRecip(const F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::F32: case Type::F32:

View File

@@ -228,6 +228,7 @@ public:
[[nodiscard]] F32 FPSin(const F32& value); [[nodiscard]] F32 FPSin(const F32& value);
[[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPExp2(const F32& value);
[[nodiscard]] F32 FPLog2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] F32 FPPow(const F32& x, const F32& y);
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp); [[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
[[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);

View File

@@ -283,6 +283,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, F32, F32, ) OPCODE(FPSin, F32, F32, )
OPCODE(FPExp2, F32, F32, ) OPCODE(FPExp2, F32, F32, )
OPCODE(FPPow, F32, F32, F32, )
OPCODE(FPLdexp, F32, F32, U32, ) OPCODE(FPLdexp, F32, F32, U32, )
OPCODE(FPCos, F32, F32, ) OPCODE(FPCos, F32, F32, )
OPCODE(FPLog2, F32, F32, ) OPCODE(FPLog2, F32, F32, )

View File

@@ -542,7 +542,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
// Patch image instruction if image is FMask. // Patch image instruction if image is FMask.
if (image.IsFmask()) { if (AmdGpu::IsFmask(image.GetDataFmt())) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported"); ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@@ -830,8 +830,8 @@ IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR:
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
const ImageResource& image_res, const AmdGpu::Image& image) { const ImageResource& image_res, const AmdGpu::Image& image) {
const auto handle = inst.Arg(0); const auto handle = inst.Arg(0);
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF]; const auto& sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
auto sampler = sampler_res.GetSharp(info); const auto sampler = sampler_res.GetSharp(info);
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto inst_info = inst.Flags<IR::TextureInstInfo>(); const auto inst_info = inst.Flags<IR::TextureInstInfo>();
@@ -1001,7 +1001,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
}(); }();
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion()); auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) {
converted = ApplyForceDegamma(ir, texel, image.DstSelect());
}
inst.ReplaceUsesWith(converted); inst.ReplaceUsesWith(converted);
} }

View File

@@ -4,7 +4,7 @@
#pragma once #pragma once
#include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/ir/ir_emitter.h"
#include "video_core/amdgpu/types.h" #include "video_core/amdgpu/pixel_format.h"
namespace Shader::IR { namespace Shader::IR {
@@ -21,6 +21,36 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
return swizzled; return swizzled;
} }
/// Converts gamma corrected value to linear space
inline F32 ApplyGammaToLinear(IREmitter& ir, F32& c) {
const F32 a =
ir.FPPow(ir.FPMul(ir.FPAdd(c, ir.Imm32(0.055f)), ir.Imm32(1.0f / 1.055f)), ir.Imm32(2.4f));
const F32 b = ir.FPMul(c, ir.Imm32(1.0f / 12.92f));
return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)};
}
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value,
const AmdGpu::CompMapping& mapping) {
auto x = F32{ir.CompositeExtract(value, 0)};
auto y = F32{ir.CompositeExtract(value, 1)};
auto z = F32{ir.CompositeExtract(value, 2)};
auto w = F32{ir.CompositeExtract(value, 3)};
// Gamma correction is only applied to RGB components
if (AmdGpu::IsRgb(mapping.r)) {
x = ApplyGammaToLinear(ir, x);
}
if (AmdGpu::IsRgb(mapping.g)) {
y = ApplyGammaToLinear(ir, y);
}
if (AmdGpu::IsRgb(mapping.b)) {
z = ApplyGammaToLinear(ir, z);
}
if (AmdGpu::IsRgb(mapping.a)) {
w = ApplyGammaToLinear(ir, w);
}
return ir.CompositeConstruct(x, y, z, w);
}
/// Applies a number conversion in the read direction. /// Applies a number conversion in the read direction.
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value, inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
const AmdGpu::NumberConversion& conversion) { const AmdGpu::NumberConversion& conversion) {

View File

@@ -48,6 +48,7 @@ struct ImageSpecialization {
bool is_integer = false; bool is_integer = false;
bool is_storage = false; bool is_storage = false;
bool is_cube = false; bool is_cube = false;
bool is_srgb = false;
AmdGpu::CompMapping dst_select{}; AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{}; AmdGpu::NumberConversion num_conversion{};
@@ -62,7 +63,8 @@ struct FMaskSpecialization {
}; };
struct SamplerSpecialization { struct SamplerSpecialization {
bool force_unnormalized = false; u8 force_unnormalized : 1;
u8 force_degamma : 1;
auto operator<=>(const SamplerSpecialization&) const = default; auto operator<=>(const SamplerSpecialization&) const = default;
}; };
@@ -136,6 +138,8 @@ struct StageSpecialization {
spec.is_cube = sharp.IsCube(); spec.is_cube = sharp.IsCube();
if (spec.is_storage) { if (spec.is_storage) {
spec.dst_select = sharp.DstSelect(); spec.dst_select = sharp.DstSelect();
} else {
spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb;
} }
spec.num_conversion = sharp.GetNumberConversion(); spec.num_conversion = sharp.GetNumberConversion();
}); });
@@ -147,6 +151,7 @@ struct StageSpecialization {
ForEachSharp(samplers, info->samplers, ForEachSharp(samplers, info->samplers,
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
spec.force_unnormalized = sharp.force_unnormalized; spec.force_unnormalized = sharp.force_unnormalized;
spec.force_degamma = sharp.force_degamma;
}); });
// Initialize runtime_info fields that rely on analysis in tessellation passes // Initialize runtime_info fields that rely on analysis in tessellation passes

View File

@@ -312,6 +312,11 @@ constexpr NumberClass GetNumberClass(const NumberFormat nfmt) {
} }
} }
constexpr bool IsRgb(CompSwizzle swizzle) {
return swizzle == CompSwizzle::Red || swizzle == CompSwizzle::Green ||
swizzle == CompSwizzle::Blue;
}
constexpr bool IsInteger(const NumberFormat nfmt) { constexpr bool IsInteger(const NumberFormat nfmt) {
return nfmt == NumberFormat::Sint || nfmt == NumberFormat::Uint; return nfmt == NumberFormat::Sint || nfmt == NumberFormat::Uint;
} }
@@ -320,6 +325,10 @@ constexpr bool IsBlockCoded(DataFormat format) {
return format >= DataFormat::FormatBc1 && format <= DataFormat::FormatBc7; return format >= DataFormat::FormatBc1 && format <= DataFormat::FormatBc7;
} }
constexpr bool IsFmask(DataFormat format) {
return format >= DataFormat::FormatFmask8_1 && format <= DataFormat::FormatFmask64_8;
}
std::string_view NameOf(DataFormat fmt); std::string_view NameOf(DataFormat fmt);
std::string_view NameOf(NumberFormat fmt); std::string_view NameOf(NumberFormat fmt);

View File

@@ -293,11 +293,6 @@ struct Image {
return (((banks - 1) << 4) & base_address) >> 4; return (((banks - 1) << 4) & base_address) >> 4;
} }
bool IsFmask() const noexcept {
return GetDataFmt() >= DataFormat::FormatFmask8_1 &&
GetDataFmt() <= DataFormat::FormatFmask64_8;
}
ImageType GetBaseType() const noexcept { ImageType GetBaseType() const noexcept {
const auto base_type = GetType(); const auto base_type = GetType();
if (base_type == ImageType::Color1DArray) { if (base_type == ImageType::Color1DArray) {

View File

@@ -11,32 +11,29 @@ namespace VideoCore {
Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler, Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler,
const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) { const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) {
if (sampler.force_degamma) {
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
}
using namespace Vulkan; using namespace Vulkan;
const bool anisotropyEnable = instance.IsAnisotropicFilteringSupported() && const bool anisotropy_enable = instance.IsAnisotropicFilteringSupported() &&
(AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) || (AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) ||
AmdGpu::IsAnisoFilter(sampler.xy_min_filter)); AmdGpu::IsAnisoFilter(sampler.xy_min_filter));
const float maxAnisotropy = const float max_anisotropy =
anisotropyEnable ? std::clamp(sampler.MaxAniso(), 1.0f, instance.MaxSamplerAnisotropy()) anisotropy_enable ? std::clamp(sampler.MaxAniso(), 1.0f, instance.MaxSamplerAnisotropy())
: 1.0f; : 1.0f;
auto borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type); auto border_color = LiverpoolToVK::BorderColor(sampler.border_color_type);
if (!instance.IsCustomBorderColorSupported()) { if (!instance.IsCustomBorderColorSupported()) {
LOG_WARNING(Render_Vulkan, "Custom border color is not supported, falling back to black"); LOG_WARNING(Render_Vulkan, "Custom border color is not supported, falling back to black");
borderColor = vk::BorderColor::eFloatOpaqueBlack; border_color = vk::BorderColor::eFloatOpaqueBlack;
} }
const auto customColor = [&]() -> std::optional<vk::SamplerCustomBorderColorCreateInfoEXT> { const auto custom_color = [&]() -> std::optional<vk::SamplerCustomBorderColorCreateInfoEXT> {
if (borderColor == vk::BorderColor::eFloatCustomEXT) { if (border_color == vk::BorderColor::eFloatCustomEXT) {
const auto borderColorIndex = sampler.border_color_ptr.Value(); const auto border_color_index = sampler.border_color_ptr.Value();
const auto borderColorBuffer = border_color_base.Address<std::array<float, 4>*>(); const auto border_color_buffer = border_color_base.Address<std::array<float, 4>*>();
const auto customBorderColorArray = borderColorBuffer[borderColorIndex]; const auto custom_border_color_array = border_color_buffer[border_color_index];
const vk::SamplerCustomBorderColorCreateInfoEXT ret{ const vk::SamplerCustomBorderColorCreateInfoEXT ret{
.customBorderColor = .customBorderColor =
vk::ClearColorValue{ vk::ClearColorValue{
.float32 = customBorderColorArray, .float32 = custom_border_color_array,
}, },
.format = vk::Format::eR32G32B32A32Sfloat, .format = vk::Format::eR32G32B32A32Sfloat,
}; };
@@ -47,7 +44,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample
}(); }();
const vk::SamplerCreateInfo sampler_ci = { const vk::SamplerCreateInfo sampler_ci = {
.pNext = customColor ? &*customColor : nullptr, .pNext = custom_color ? &*custom_color : nullptr,
.magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter), .magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter),
.minFilter = LiverpoolToVK::Filter(sampler.xy_min_filter), .minFilter = LiverpoolToVK::Filter(sampler.xy_min_filter),
.mipmapMode = LiverpoolToVK::MipFilter(sampler.mip_filter), .mipmapMode = LiverpoolToVK::MipFilter(sampler.mip_filter),
@@ -55,13 +52,13 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample
.addressModeV = LiverpoolToVK::ClampMode(sampler.clamp_y), .addressModeV = LiverpoolToVK::ClampMode(sampler.clamp_y),
.addressModeW = LiverpoolToVK::ClampMode(sampler.clamp_z), .addressModeW = LiverpoolToVK::ClampMode(sampler.clamp_z),
.mipLodBias = std::min(sampler.LodBias(), instance.MaxSamplerLodBias()), .mipLodBias = std::min(sampler.LodBias(), instance.MaxSamplerLodBias()),
.anisotropyEnable = anisotropyEnable, .anisotropyEnable = anisotropy_enable,
.maxAnisotropy = maxAnisotropy, .maxAnisotropy = max_anisotropy,
.compareEnable = sampler.depth_compare_func != AmdGpu::DepthCompare::Never, .compareEnable = sampler.depth_compare_func != AmdGpu::DepthCompare::Never,
.compareOp = LiverpoolToVK::DepthCompare(sampler.depth_compare_func), .compareOp = LiverpoolToVK::DepthCompare(sampler.depth_compare_func),
.minLod = sampler.MinLod(), .minLod = sampler.MinLod(),
.maxLod = sampler.MaxLod(), .maxLod = sampler.MaxLod(),
.borderColor = borderColor, .borderColor = border_color,
.unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations. .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations.
}; };
auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci); auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci);