diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 01c51e399..648740c82 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -141,6 +141,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) { return ctx.OpExp2(ctx.F32[1], value); } +Id EmitFPPow(EmitContext& ctx, Id x, Id y) { + return ctx.OpPow(ctx.F32[1], x, y); +} + Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) { return ctx.OpLdexp(ctx.F32[1], value, exp); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a8c58bdba..319f2d5ba 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -289,6 +289,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value); Id EmitFPSin(EmitContext& ctx, Id value); Id EmitFPCos(EmitContext& ctx, Id value); Id EmitFPExp2(EmitContext& ctx, Id value); +Id EmitFPPow(EmitContext& ctx, Id x, Id y); Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp); Id EmitFPLog2(EmitContext& ctx, Id value); Id EmitFPRecip32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 16c841581..ccf2c45e0 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -29,17 +29,6 @@ static constexpr size_t NumBuffers = 40; static constexpr size_t NumSamplers = 16; static constexpr size_t NumFMasks = 8; -enum class TextureType : u32 { - Color1D, - ColorArray1D, - Color2D, - ColorArray2D, - Color3D, - ColorCube, - Buffer, -}; -constexpr u32 NUM_TEXTURE_TYPES = 7; - enum class BufferType : u32 { Guest, Flatbuf, diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 498615b67..6d22ff582 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1169,6 +1169,10 @@ F32 IREmitter::FPLog2(const F32& value) { return Inst(Opcode::FPLog2, value); } +F32 IREmitter::FPPow(const F32& x, const F32& y) { + return Inst(Opcode::FPPow, x, y); +} + F32F64 IREmitter::FPRecip(const F32F64& value) { switch (value.Type()) { case Type::F32: diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 2cde957f3..7fbc3cc90 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -228,6 +228,7 @@ public: [[nodiscard]] F32 FPSin(const F32& value); [[nodiscard]] F32 FPExp2(const F32& value); [[nodiscard]] F32 FPLog2(const F32& value); + [[nodiscard]] F32 FPPow(const F32& x, const F32& y); [[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp); [[nodiscard]] F32F64 FPRecip(const F32F64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index fecfa472c..a7b29132c 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -283,6 +283,7 @@ OPCODE(FPRecipSqrt64, F64, F64, OPCODE(FPSqrt, F32, F32, ) OPCODE(FPSin, F32, F32, ) OPCODE(FPExp2, F32, F32, ) +OPCODE(FPPow, F32, F32, F32, ) OPCODE(FPLdexp, F32, F32, U32, ) OPCODE(FPCos, F32, F32, ) OPCODE(FPLog2, F32, F32, ) diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 56f29a2c4..312f818ba 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -542,7 +542,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); // Patch image instruction if image is FMask. - if (image.IsFmask()) { + if (AmdGpu::IsFmask(image.GetDataFmt())) { ASSERT_MSG(!is_written, "FMask storage instructions are not supported"); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; @@ -830,8 +830,8 @@ IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR: void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, const ImageResource& image_res, const AmdGpu::Image& image) { const auto handle = inst.Arg(0); - const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF]; - auto sampler = sampler_res.GetSharp(info); + const auto& sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF]; + const auto sampler = sampler_res.GetSharp(info); IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; const auto inst_info = inst.Flags(); @@ -1001,7 +1001,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info, return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info); }(); - const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion()); + auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion()); + if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) { + converted = ApplyForceDegamma(ir, texel, image.DstSelect()); + } inst.ReplaceUsesWith(converted); } diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h index 2a18f394a..10728d8dd 100644 --- a/src/shader_recompiler/ir/reinterpret.h +++ b/src/shader_recompiler/ir/reinterpret.h @@ -4,7 +4,7 @@ #pragma once #include "shader_recompiler/ir/ir_emitter.h" -#include "video_core/amdgpu/types.h" +#include "video_core/amdgpu/pixel_format.h" namespace Shader::IR { @@ -21,6 +21,36 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp return swizzled; } +/// Converts gamma corrected value to linear space +inline F32 ApplyGammaToLinear(IREmitter& ir, F32& c) { + const F32 a = + ir.FPPow(ir.FPMul(ir.FPAdd(c, ir.Imm32(0.055f)), ir.Imm32(1.0f / 1.055f)), ir.Imm32(2.4f)); + const F32 b = ir.FPMul(c, ir.Imm32(1.0f / 12.92f)); + return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)}; +} + +inline Value ApplyForceDegamma(IREmitter& ir, const Value& value, + const AmdGpu::CompMapping& mapping) { + auto x = F32{ir.CompositeExtract(value, 0)}; + auto y = F32{ir.CompositeExtract(value, 1)}; + auto z = F32{ir.CompositeExtract(value, 2)}; + auto w = F32{ir.CompositeExtract(value, 3)}; + // Gamma correction is only applied to RGB components + if (AmdGpu::IsRgb(mapping.r)) { + x = ApplyGammaToLinear(ir, x); + } + if (AmdGpu::IsRgb(mapping.g)) { + y = ApplyGammaToLinear(ir, y); + } + if (AmdGpu::IsRgb(mapping.b)) { + z = ApplyGammaToLinear(ir, z); + } + if (AmdGpu::IsRgb(mapping.a)) { + w = ApplyGammaToLinear(ir, w); + } + return ir.CompositeConstruct(x, y, z, w); +} + /// Applies a number conversion in the read direction. inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value, const AmdGpu::NumberConversion& conversion) { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 7901b8db6..7dd1cb424 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -48,6 +48,7 @@ struct ImageSpecialization { bool is_integer = false; bool is_storage = false; bool is_cube = false; + bool is_srgb = false; AmdGpu::CompMapping dst_select{}; AmdGpu::NumberConversion num_conversion{}; @@ -62,7 +63,8 @@ struct FMaskSpecialization { }; struct SamplerSpecialization { - bool force_unnormalized = false; + u8 force_unnormalized : 1; + u8 force_degamma : 1; auto operator<=>(const SamplerSpecialization&) const = default; }; @@ -136,6 +138,8 @@ struct StageSpecialization { spec.is_cube = sharp.IsCube(); if (spec.is_storage) { spec.dst_select = sharp.DstSelect(); + } else { + spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb; } spec.num_conversion = sharp.GetNumberConversion(); }); @@ -147,6 +151,7 @@ struct StageSpecialization { ForEachSharp(samplers, info->samplers, [](auto& spec, const auto& desc, AmdGpu::Sampler sharp) { spec.force_unnormalized = sharp.force_unnormalized; + spec.force_degamma = sharp.force_degamma; }); // Initialize runtime_info fields that rely on analysis in tessellation passes diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index e7ad27dd3..7a9e32fc6 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -312,6 +312,11 @@ constexpr NumberClass GetNumberClass(const NumberFormat nfmt) { } } +constexpr bool IsRgb(CompSwizzle swizzle) { + return swizzle == CompSwizzle::Red || swizzle == CompSwizzle::Green || + swizzle == CompSwizzle::Blue; +} + constexpr bool IsInteger(const NumberFormat nfmt) { return nfmt == NumberFormat::Sint || nfmt == NumberFormat::Uint; } @@ -320,6 +325,10 @@ constexpr bool IsBlockCoded(DataFormat format) { return format >= DataFormat::FormatBc1 && format <= DataFormat::FormatBc7; } +constexpr bool IsFmask(DataFormat format) { + return format >= DataFormat::FormatFmask8_1 && format <= DataFormat::FormatFmask64_8; +} + std::string_view NameOf(DataFormat fmt); std::string_view NameOf(NumberFormat fmt); diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 742cdee86..7c5c2df12 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -293,11 +293,6 @@ struct Image { return (((banks - 1) << 4) & base_address) >> 4; } - bool IsFmask() const noexcept { - return GetDataFmt() >= DataFormat::FormatFmask8_1 && - GetDataFmt() <= DataFormat::FormatFmask64_8; - } - ImageType GetBaseType() const noexcept { const auto base_type = GetType(); if (base_type == ImageType::Color1DArray) { diff --git a/src/video_core/texture_cache/sampler.cpp b/src/video_core/texture_cache/sampler.cpp index e18c79a59..539b39b25 100644 --- a/src/video_core/texture_cache/sampler.cpp +++ b/src/video_core/texture_cache/sampler.cpp @@ -11,32 +11,29 @@ namespace VideoCore { Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler, const AmdGpu::Liverpool::BorderColorBufferBase& border_color_base) { - if (sampler.force_degamma) { - LOG_WARNING(Render_Vulkan, "Texture requires gamma correction"); - } using namespace Vulkan; - const bool anisotropyEnable = instance.IsAnisotropicFilteringSupported() && - (AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) || - AmdGpu::IsAnisoFilter(sampler.xy_min_filter)); - const float maxAnisotropy = - anisotropyEnable ? std::clamp(sampler.MaxAniso(), 1.0f, instance.MaxSamplerAnisotropy()) - : 1.0f; - auto borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type); + const bool anisotropy_enable = instance.IsAnisotropicFilteringSupported() && + (AmdGpu::IsAnisoFilter(sampler.xy_mag_filter) || + AmdGpu::IsAnisoFilter(sampler.xy_min_filter)); + const float max_anisotropy = + anisotropy_enable ? std::clamp(sampler.MaxAniso(), 1.0f, instance.MaxSamplerAnisotropy()) + : 1.0f; + auto border_color = LiverpoolToVK::BorderColor(sampler.border_color_type); if (!instance.IsCustomBorderColorSupported()) { LOG_WARNING(Render_Vulkan, "Custom border color is not supported, falling back to black"); - borderColor = vk::BorderColor::eFloatOpaqueBlack; + border_color = vk::BorderColor::eFloatOpaqueBlack; } - const auto customColor = [&]() -> std::optional { - if (borderColor == vk::BorderColor::eFloatCustomEXT) { - const auto borderColorIndex = sampler.border_color_ptr.Value(); - const auto borderColorBuffer = border_color_base.Address*>(); - const auto customBorderColorArray = borderColorBuffer[borderColorIndex]; + const auto custom_color = [&]() -> std::optional { + if (border_color == vk::BorderColor::eFloatCustomEXT) { + const auto border_color_index = sampler.border_color_ptr.Value(); + const auto border_color_buffer = border_color_base.Address*>(); + const auto custom_border_color_array = border_color_buffer[border_color_index]; const vk::SamplerCustomBorderColorCreateInfoEXT ret{ .customBorderColor = vk::ClearColorValue{ - .float32 = customBorderColorArray, + .float32 = custom_border_color_array, }, .format = vk::Format::eR32G32B32A32Sfloat, }; @@ -47,7 +44,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample }(); const vk::SamplerCreateInfo sampler_ci = { - .pNext = customColor ? &*customColor : nullptr, + .pNext = custom_color ? &*custom_color : nullptr, .magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter), .minFilter = LiverpoolToVK::Filter(sampler.xy_min_filter), .mipmapMode = LiverpoolToVK::MipFilter(sampler.mip_filter), @@ -55,13 +52,13 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample .addressModeV = LiverpoolToVK::ClampMode(sampler.clamp_y), .addressModeW = LiverpoolToVK::ClampMode(sampler.clamp_z), .mipLodBias = std::min(sampler.LodBias(), instance.MaxSamplerLodBias()), - .anisotropyEnable = anisotropyEnable, - .maxAnisotropy = maxAnisotropy, + .anisotropyEnable = anisotropy_enable, + .maxAnisotropy = max_anisotropy, .compareEnable = sampler.depth_compare_func != AmdGpu::DepthCompare::Never, .compareOp = LiverpoolToVK::DepthCompare(sampler.depth_compare_func), .minLod = sampler.MinLod(), .maxLod = sampler.MaxLod(), - .borderColor = borderColor, + .borderColor = border_color, .unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations. }; auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci);