diff --git a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp index 3fdc6f0cd..3bfdf657e 100644 --- a/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp +++ b/src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp @@ -202,7 +202,7 @@ static void LowerBufferFormatInst(IR::Block& block, IR::Inst& inst, Info& info) .data_format = data_format, .num_format = num_format, .swizzle = is_inst_typed ? AmdGpu::IdentityMapping : buffer.DstSelect(), - .num_conversion = AmdGpu::MapNumberConversion(num_format), + .num_conversion = AmdGpu::MapNumberConversion(num_format, data_format), .num_components = AmdGpu::NumComponents(data_format), }; diff --git a/src/shader_recompiler/ir/reinterpret.h b/src/shader_recompiler/ir/reinterpret.h index e63aff4c9..fb6e1efb8 100644 --- a/src/shader_recompiler/ir/reinterpret.h +++ b/src/shader_recompiler/ir/reinterpret.h @@ -34,10 +34,16 @@ inline F32 ApplyReadNumberConversion(IREmitter& ir, const F32& value, case AmdGpu::NumberConversion::UnormToUbnorm: // Convert 0...1 to -1...1 return ir.FPSub(ir.FPMul(value, ir.Imm32(2.f)), ir.Imm32(1.f)); - case AmdGpu::NumberConversion::SintToSnormNz: { + case AmdGpu::NumberConversion::Sint8ToSnormNz: { const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast(value), ir.Imm32(2)), ir.Imm32(1)); const IR::F32 left = ir.ConvertSToF(32, 32, additon); - const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + return ir.FPDiv(left, max); + } + case AmdGpu::NumberConversion::Sint16ToSnormNz: { + const IR::U32 additon = ir.IAdd(ir.IMul(ir.BitCast(value), ir.Imm32(2)), ir.Imm32(1)); + const IR::F32 left = ir.ConvertSToF(32, 32, additon); + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); return ir.FPDiv(left, max); } default: @@ -72,12 +78,18 @@ inline F32 ApplyWriteNumberConversion(IREmitter& ir, const F32& value, case AmdGpu::NumberConversion::UnormToUbnorm: // Convert -1...1 to 0...1 return ir.FPDiv(ir.FPAdd(value, ir.Imm32(1.f)), ir.Imm32(2.f)); - case AmdGpu::NumberConversion::SintToSnormNz: { - const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + case AmdGpu::NumberConversion::Sint8ToSnormNz: { + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max); const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f)); return ir.BitCast(U32{ir.FPDiv(left, ir.Imm32(2.f))}); } + case AmdGpu::NumberConversion::Sint16ToSnormNz: { + const IR::F32 max = ir.Imm32(float(std::numeric_limits::max())); + const IR::F32 mul = ir.FPMul(ir.FPClamp(value, ir.Imm32(-1.f), ir.Imm32(1.f)), max); + const IR::F32 left = ir.FPSub(mul, ir.Imm32(1.f)); + return ir.BitCast(U32{ir.FPDiv(left, ir.Imm32(2.f))}); + } default: UNREACHABLE(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 8f9292f1c..56829ffc6 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -932,7 +932,7 @@ struct Liverpool { } [[nodiscard]] NumberConversion GetNumberConversion() const { - return MapNumberConversion(info.number_type); + return MapNumberConversion(info.number_type, info.format); } [[nodiscard]] CompMapping Swizzle() const { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 64a85c812..c387c7bf2 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -68,7 +68,7 @@ struct Buffer { } NumberConversion GetNumberConversion() const noexcept { - return MapNumberConversion(NumberFormat(num_format)); + return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format)); } u32 GetStride() const noexcept { @@ -292,7 +292,7 @@ struct Image { } NumberConversion GetNumberConversion() const noexcept { - return MapNumberConversion(NumberFormat(num_format)); + return MapNumberConversion(NumberFormat(num_format), DataFormat(data_format)); } TilingMode GetTilingMode() const { diff --git a/src/video_core/amdgpu/types.h b/src/video_core/amdgpu/types.h index 1044a1aaa..cd4c141cf 100644 --- a/src/video_core/amdgpu/types.h +++ b/src/video_core/amdgpu/types.h @@ -197,7 +197,8 @@ enum class NumberConversion : u32 { UintToUscaled = 1, SintToSscaled = 2, UnormToUbnorm = 3, - SintToSnormNz = 4, + Sint16ToSnormNz = 5, + Sint8ToSnormNz = 6, }; struct CompMapping { @@ -338,16 +339,29 @@ inline CompMapping RemapSwizzle(const DataFormat format, const CompMapping swizz } } -inline NumberConversion MapNumberConversion(const NumberFormat format) { - switch (format) { +inline NumberConversion MapNumberConversion(const NumberFormat num_fmt, const DataFormat data_fmt) { + switch (num_fmt) { case NumberFormat::Uscaled: return NumberConversion::UintToUscaled; case NumberFormat::Sscaled: return NumberConversion::SintToSscaled; case NumberFormat::Ubnorm: return NumberConversion::UnormToUbnorm; - case NumberFormat::SnormNz: - return NumberConversion::SintToSnormNz; + case NumberFormat::SnormNz: { + switch (data_fmt) { + case DataFormat::Format8: + case DataFormat::Format8_8: + case DataFormat::Format8_8_8_8: + return NumberConversion::Sint8ToSnormNz; + case DataFormat::Format16: + case DataFormat::Format16_16: + case DataFormat::Format16_16_16_16: + return NumberConversion::Sint16ToSnormNz; + default: + UNREACHABLE_MSG("data_fmt = {}", u32(data_fmt)); + break; + } + } default: return NumberConversion::None; }