mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-10 05:38:49 +00:00
ir: Perform degamma in shader when sampler sets force_degamma (#3420)
* ir: Perform degamma in shader when sampler sets force_degamma * specialization: Add srgb if image is sampled Might fix cases where sampler force_degamma is used with srgb image
This commit is contained in:
@@ -141,6 +141,10 @@ Id EmitFPExp2(EmitContext& ctx, Id value) {
|
||||
return ctx.OpExp2(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitFPPow(EmitContext& ctx, Id x, Id y) {
|
||||
return ctx.OpPow(ctx.F32[1], x, y);
|
||||
}
|
||||
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp) {
|
||||
return ctx.OpLdexp(ctx.F32[1], value, exp);
|
||||
}
|
||||
|
||||
@@ -289,6 +289,7 @@ Id EmitFPNeg64(EmitContext& ctx, Id value);
|
||||
Id EmitFPSin(EmitContext& ctx, Id value);
|
||||
Id EmitFPCos(EmitContext& ctx, Id value);
|
||||
Id EmitFPExp2(EmitContext& ctx, Id value);
|
||||
Id EmitFPPow(EmitContext& ctx, Id x, Id y);
|
||||
Id EmitFPLdexp(EmitContext& ctx, Id value, Id exp);
|
||||
Id EmitFPLog2(EmitContext& ctx, Id value);
|
||||
Id EmitFPRecip32(EmitContext& ctx, Id value);
|
||||
|
||||
@@ -29,17 +29,6 @@ static constexpr size_t NumBuffers = 40;
|
||||
static constexpr size_t NumSamplers = 16;
|
||||
static constexpr size_t NumFMasks = 8;
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
Color2D,
|
||||
ColorArray2D,
|
||||
Color3D,
|
||||
ColorCube,
|
||||
Buffer,
|
||||
};
|
||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
|
||||
enum class BufferType : u32 {
|
||||
Guest,
|
||||
Flatbuf,
|
||||
|
||||
@@ -1169,6 +1169,10 @@ F32 IREmitter::FPLog2(const F32& value) {
|
||||
return Inst<F32>(Opcode::FPLog2, value);
|
||||
}
|
||||
|
||||
F32 IREmitter::FPPow(const F32& x, const F32& y) {
|
||||
return Inst<F32>(Opcode::FPPow, x, y);
|
||||
}
|
||||
|
||||
F32F64 IREmitter::FPRecip(const F32F64& value) {
|
||||
switch (value.Type()) {
|
||||
case Type::F32:
|
||||
|
||||
@@ -228,6 +228,7 @@ public:
|
||||
[[nodiscard]] F32 FPSin(const F32& value);
|
||||
[[nodiscard]] F32 FPExp2(const F32& value);
|
||||
[[nodiscard]] F32 FPLog2(const F32& value);
|
||||
[[nodiscard]] F32 FPPow(const F32& x, const F32& y);
|
||||
[[nodiscard]] F32 FPLdexp(const F32& value, const U32& exp);
|
||||
[[nodiscard]] F32F64 FPRecip(const F32F64& value);
|
||||
[[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
|
||||
|
||||
@@ -283,6 +283,7 @@ OPCODE(FPRecipSqrt64, F64, F64,
|
||||
OPCODE(FPSqrt, F32, F32, )
|
||||
OPCODE(FPSin, F32, F32, )
|
||||
OPCODE(FPExp2, F32, F32, )
|
||||
OPCODE(FPPow, F32, F32, F32, )
|
||||
OPCODE(FPLdexp, F32, F32, U32, )
|
||||
OPCODE(FPCos, F32, F32, )
|
||||
OPCODE(FPLog2, F32, F32, )
|
||||
|
||||
@@ -542,7 +542,7 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
|
||||
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
|
||||
|
||||
// Patch image instruction if image is FMask.
|
||||
if (image.IsFmask()) {
|
||||
if (AmdGpu::IsFmask(image.GetDataFmt())) {
|
||||
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
@@ -830,8 +830,8 @@ IR::Value FixCubeCoords(IR::IREmitter& ir, const AmdGpu::Image& image, const IR:
|
||||
void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
const ImageResource& image_res, const AmdGpu::Image& image) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
|
||||
auto sampler = sampler_res.GetSharp(info);
|
||||
const auto& sampler_res = info.samplers[(handle.U32() >> 16) & 0xFFFF];
|
||||
const auto sampler = sampler_res.GetSharp(info);
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||
@@ -1001,7 +1001,10 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
|
||||
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
|
||||
}();
|
||||
|
||||
const auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
|
||||
if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) {
|
||||
converted = ApplyForceDegamma(ir, texel, image.DstSelect());
|
||||
}
|
||||
inst.ReplaceUsesWith(converted);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "video_core/amdgpu/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
@@ -21,6 +21,36 @@ inline Value ApplySwizzle(IREmitter& ir, const Value& vector, const AmdGpu::Comp
|
||||
return swizzled;
|
||||
}
|
||||
|
||||
/// Converts gamma corrected value to linear space
|
||||
inline F32 ApplyGammaToLinear(IREmitter& ir, F32& c) {
|
||||
const F32 a =
|
||||
ir.FPPow(ir.FPMul(ir.FPAdd(c, ir.Imm32(0.055f)), ir.Imm32(1.0f / 1.055f)), ir.Imm32(2.4f));
|
||||
const F32 b = ir.FPMul(c, ir.Imm32(1.0f / 12.92f));
|
||||
return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)};
|
||||
}
|
||||
|
||||
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value,
|
||||
const AmdGpu::CompMapping& mapping) {
|
||||
auto x = F32{ir.CompositeExtract(value, 0)};
|
||||
auto y = F32{ir.CompositeExtract(value, 1)};
|
||||
auto z = F32{ir.CompositeExtract(value, 2)};
|
||||
auto w = F32{ir.CompositeExtract(value, 3)};
|
||||
// Gamma correction is only applied to RGB components
|
||||
if (AmdGpu::IsRgb(mapping.r)) {
|
||||
x = ApplyGammaToLinear(ir, x);
|
||||
}
|
||||
if (AmdGpu::IsRgb(mapping.g)) {
|
||||
y = ApplyGammaToLinear(ir, y);
|
||||
}
|
||||
if (AmdGpu::IsRgb(mapping.b)) {
|
||||
z = ApplyGammaToLinear(ir, z);
|
||||
}
|
||||
if (AmdGpu::IsRgb(mapping.a)) {
|
||||
w = ApplyGammaToLinear(ir, w);
|
||||
}
|
||||
return ir.CompositeConstruct(x, y, z, w);
|
||||
}
|
||||
|
||||
/// Applies a number conversion in the read direction.
|
||||
inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value,
|
||||
const AmdGpu::NumberConversion& conversion) {
|
||||
|
||||
@@ -48,6 +48,7 @@ struct ImageSpecialization {
|
||||
bool is_integer = false;
|
||||
bool is_storage = false;
|
||||
bool is_cube = false;
|
||||
bool is_srgb = false;
|
||||
AmdGpu::CompMapping dst_select{};
|
||||
AmdGpu::NumberConversion num_conversion{};
|
||||
|
||||
@@ -62,7 +63,8 @@ struct FMaskSpecialization {
|
||||
};
|
||||
|
||||
struct SamplerSpecialization {
|
||||
bool force_unnormalized = false;
|
||||
u8 force_unnormalized : 1;
|
||||
u8 force_degamma : 1;
|
||||
|
||||
auto operator<=>(const SamplerSpecialization&) const = default;
|
||||
};
|
||||
@@ -136,6 +138,8 @@ struct StageSpecialization {
|
||||
spec.is_cube = sharp.IsCube();
|
||||
if (spec.is_storage) {
|
||||
spec.dst_select = sharp.DstSelect();
|
||||
} else {
|
||||
spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb;
|
||||
}
|
||||
spec.num_conversion = sharp.GetNumberConversion();
|
||||
});
|
||||
@@ -147,6 +151,7 @@ struct StageSpecialization {
|
||||
ForEachSharp(samplers, info->samplers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
|
||||
spec.force_unnormalized = sharp.force_unnormalized;
|
||||
spec.force_degamma = sharp.force_degamma;
|
||||
});
|
||||
|
||||
// Initialize runtime_info fields that rely on analysis in tessellation passes
|
||||
|
||||
Reference in New Issue
Block a user