mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-12 14:48:52 +00:00
shader_recompiler: Add lowering pass for when 64-bit float is unsupported. (#2858)
* shader_recompiler: Add lowering pass for when 64-bit float is unsupported. * shader_recompiler: Fix PackDouble2x32/UnpackDouble2x32 type. * shader_recompiler: Remove extra bit cast implementations.
This commit is contained in:
@@ -84,16 +84,6 @@ IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
|
||||
return Inst<IR::F16>(Opcode::BitCastF16U16, value);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
|
||||
return Inst<IR::U64>(Opcode::BitCastU64F64, value);
|
||||
}
|
||||
|
||||
template <>
|
||||
IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
|
||||
return Inst<IR::F64>(Opcode::BitCastF64U64, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::ConditionRef(const U1& value) {
|
||||
return Inst<U1>(Opcode::ConditionRef, value);
|
||||
}
|
||||
@@ -841,8 +831,12 @@ Value IREmitter::UnpackUint2x32(const U64& value) {
|
||||
return Inst<Value>(Opcode::UnpackUint2x32, value);
|
||||
}
|
||||
|
||||
F64 IREmitter::PackFloat2x32(const Value& vector) {
|
||||
return Inst<F64>(Opcode::PackFloat2x32, vector);
|
||||
F64 IREmitter::PackDouble2x32(const Value& vector) {
|
||||
return Inst<F64>(Opcode::PackDouble2x32, vector);
|
||||
}
|
||||
|
||||
Value IREmitter::UnpackDouble2x32(const F64& value) {
|
||||
return Inst<Value>(Opcode::UnpackDouble2x32, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::Pack2x16(const AmdGpu::NumberFormat number_format, const Value& vector) {
|
||||
|
||||
@@ -181,7 +181,8 @@ public:
|
||||
|
||||
[[nodiscard]] U64 PackUint2x32(const Value& vector);
|
||||
[[nodiscard]] Value UnpackUint2x32(const U64& value);
|
||||
[[nodiscard]] F64 PackFloat2x32(const Value& vector);
|
||||
[[nodiscard]] F64 PackDouble2x32(const Value& vector);
|
||||
[[nodiscard]] Value UnpackDouble2x32(const F64& value);
|
||||
|
||||
[[nodiscard]] U32 Pack2x16(AmdGpu::NumberFormat number_format, const Value& vector);
|
||||
[[nodiscard]] Value Unpack2x16(AmdGpu::NumberFormat number_format, const U32& value);
|
||||
|
||||
@@ -191,14 +191,13 @@ OPCODE(SelectF64, F64, U1,
|
||||
// Bitwise conversions
|
||||
OPCODE(BitCastU16F16, U16, F16, )
|
||||
OPCODE(BitCastU32F32, U32, F32, )
|
||||
OPCODE(BitCastU64F64, U64, F64, )
|
||||
OPCODE(BitCastF16U16, F16, U16, )
|
||||
OPCODE(BitCastF32U32, F32, U32, )
|
||||
OPCODE(BitCastF64U64, F64, U64, )
|
||||
|
||||
OPCODE(PackUint2x32, U64, U32x2, )
|
||||
OPCODE(UnpackUint2x32, U32x2, U64, )
|
||||
OPCODE(PackFloat2x32, F64, F32x2, )
|
||||
OPCODE(PackDouble2x32, F64, U32x2, )
|
||||
OPCODE(UnpackDouble2x32, U32x2, F64, )
|
||||
|
||||
OPCODE(PackUnorm2x16, U32, F32x2, )
|
||||
OPCODE(UnpackUnorm2x16, F32x2, U32, )
|
||||
|
||||
@@ -21,6 +21,7 @@ void ReadLaneEliminationPass(IR::Program& program);
|
||||
void ResourceTrackingPass(IR::Program& program);
|
||||
void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerBufferFormatToRaw(IR::Program& program);
|
||||
void LowerFp64ToFp32(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);
|
||||
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
|
||||
|
||||
186
src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp
Normal file
186
src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp
Normal file
@@ -0,0 +1,186 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
constexpr s32 F64ToF32Exp = +1023 - 127;
|
||||
constexpr s32 F32ToF64Exp = +127 - 1023;
|
||||
|
||||
static IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
|
||||
const IR::U32 lo{ir.CompositeExtract(packed, 0)};
|
||||
const IR::U32 hi{ir.CompositeExtract(packed, 1)};
|
||||
const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
|
||||
const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
|
||||
const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
|
||||
const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
|
||||
const IR::U32 mantissa{
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
|
||||
const IR::U32 exp_if_subnorm{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
|
||||
const IR::U32 exp_if_infnan{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
|
||||
const IR::U32 result{
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
|
||||
return ir.BitCast<IR::F32>(result);
|
||||
}
|
||||
|
||||
IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
|
||||
const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
|
||||
const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
|
||||
const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
|
||||
const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
|
||||
const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
|
||||
const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
|
||||
const IR::U32 exp_if_subnorm{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
|
||||
const IR::U32 exp_if_infnan{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
|
||||
const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
|
||||
const IR::U32 hi{
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
|
||||
return ir.CompositeConstruct(lo, hi);
|
||||
}
|
||||
|
||||
static IR::Opcode Replace(IR::Opcode op) {
|
||||
switch (op) {
|
||||
case IR::Opcode::CompositeConstructF64x2:
|
||||
return IR::Opcode::CompositeConstructF32x2;
|
||||
case IR::Opcode::CompositeConstructF64x3:
|
||||
return IR::Opcode::CompositeConstructF32x3;
|
||||
case IR::Opcode::CompositeConstructF64x4:
|
||||
return IR::Opcode::CompositeConstructF32x4;
|
||||
case IR::Opcode::CompositeExtractF64x2:
|
||||
return IR::Opcode::CompositeExtractF32x2;
|
||||
case IR::Opcode::CompositeExtractF64x3:
|
||||
return IR::Opcode::CompositeExtractF32x3;
|
||||
case IR::Opcode::CompositeExtractF64x4:
|
||||
return IR::Opcode::CompositeExtractF32x4;
|
||||
case IR::Opcode::CompositeInsertF64x2:
|
||||
return IR::Opcode::CompositeInsertF32x2;
|
||||
case IR::Opcode::CompositeInsertF64x3:
|
||||
return IR::Opcode::CompositeInsertF32x3;
|
||||
case IR::Opcode::CompositeInsertF64x4:
|
||||
return IR::Opcode::CompositeInsertF32x4;
|
||||
case IR::Opcode::CompositeShuffleF64x2:
|
||||
return IR::Opcode::CompositeShuffleF32x2;
|
||||
case IR::Opcode::CompositeShuffleF64x3:
|
||||
return IR::Opcode::CompositeShuffleF32x3;
|
||||
case IR::Opcode::CompositeShuffleF64x4:
|
||||
return IR::Opcode::CompositeShuffleF32x4;
|
||||
case IR::Opcode::SelectF64:
|
||||
return IR::Opcode::SelectF64;
|
||||
case IR::Opcode::FPAbs64:
|
||||
return IR::Opcode::FPAbs32;
|
||||
case IR::Opcode::FPAdd64:
|
||||
return IR::Opcode::FPAdd32;
|
||||
case IR::Opcode::FPFma64:
|
||||
return IR::Opcode::FPFma32;
|
||||
case IR::Opcode::FPMax64:
|
||||
return IR::Opcode::FPMax32;
|
||||
case IR::Opcode::FPMin64:
|
||||
return IR::Opcode::FPMin32;
|
||||
case IR::Opcode::FPMul64:
|
||||
return IR::Opcode::FPMul32;
|
||||
case IR::Opcode::FPDiv64:
|
||||
return IR::Opcode::FPDiv32;
|
||||
case IR::Opcode::FPNeg64:
|
||||
return IR::Opcode::FPNeg32;
|
||||
case IR::Opcode::FPRecip64:
|
||||
return IR::Opcode::FPRecip32;
|
||||
case IR::Opcode::FPRecipSqrt64:
|
||||
return IR::Opcode::FPRecipSqrt32;
|
||||
case IR::Opcode::FPSaturate64:
|
||||
return IR::Opcode::FPSaturate32;
|
||||
case IR::Opcode::FPClamp64:
|
||||
return IR::Opcode::FPClamp32;
|
||||
case IR::Opcode::FPRoundEven64:
|
||||
return IR::Opcode::FPRoundEven32;
|
||||
case IR::Opcode::FPFloor64:
|
||||
return IR::Opcode::FPFloor32;
|
||||
case IR::Opcode::FPCeil64:
|
||||
return IR::Opcode::FPCeil32;
|
||||
case IR::Opcode::FPTrunc64:
|
||||
return IR::Opcode::FPTrunc32;
|
||||
case IR::Opcode::FPFract64:
|
||||
return IR::Opcode::FPFract32;
|
||||
case IR::Opcode::FPFrexpSig64:
|
||||
return IR::Opcode::FPFrexpSig32;
|
||||
case IR::Opcode::FPFrexpExp64:
|
||||
return IR::Opcode::FPFrexpExp32;
|
||||
case IR::Opcode::FPOrdEqual64:
|
||||
return IR::Opcode::FPOrdEqual32;
|
||||
case IR::Opcode::FPUnordEqual64:
|
||||
return IR::Opcode::FPUnordEqual32;
|
||||
case IR::Opcode::FPOrdNotEqual64:
|
||||
return IR::Opcode::FPOrdNotEqual32;
|
||||
case IR::Opcode::FPUnordNotEqual64:
|
||||
return IR::Opcode::FPUnordNotEqual32;
|
||||
case IR::Opcode::FPOrdLessThan64:
|
||||
return IR::Opcode::FPOrdLessThan32;
|
||||
case IR::Opcode::FPUnordLessThan64:
|
||||
return IR::Opcode::FPUnordLessThan32;
|
||||
case IR::Opcode::FPOrdGreaterThan64:
|
||||
return IR::Opcode::FPOrdGreaterThan32;
|
||||
case IR::Opcode::FPUnordGreaterThan64:
|
||||
return IR::Opcode::FPUnordGreaterThan32;
|
||||
case IR::Opcode::FPOrdLessThanEqual64:
|
||||
return IR::Opcode::FPOrdLessThanEqual32;
|
||||
case IR::Opcode::FPUnordLessThanEqual64:
|
||||
return IR::Opcode::FPUnordLessThanEqual32;
|
||||
case IR::Opcode::FPOrdGreaterThanEqual64:
|
||||
return IR::Opcode::FPOrdGreaterThanEqual32;
|
||||
case IR::Opcode::FPUnordGreaterThanEqual64:
|
||||
return IR::Opcode::FPUnordGreaterThanEqual32;
|
||||
case IR::Opcode::FPIsNan64:
|
||||
return IR::Opcode::FPIsNan32;
|
||||
case IR::Opcode::FPIsInf64:
|
||||
return IR::Opcode::FPIsInf32;
|
||||
case IR::Opcode::ConvertS32F64:
|
||||
return IR::Opcode::ConvertS32F32;
|
||||
case IR::Opcode::ConvertF32F64:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF64F32:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF64S32:
|
||||
return IR::Opcode::ConvertF32S32;
|
||||
case IR::Opcode::ConvertF64U32:
|
||||
return IR::Opcode::ConvertF32U32;
|
||||
default:
|
||||
return op;
|
||||
}
|
||||
}
|
||||
|
||||
static void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::PackDouble2x32: {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::UnpackDouble2x32: {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void LowerFp64ToFp32(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Lower(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
@@ -44,7 +44,8 @@ void Visit(Info& info, const IR::Inst& inst) {
|
||||
case IR::Opcode::BitCastF16U16:
|
||||
info.uses_fp16 = true;
|
||||
break;
|
||||
case IR::Opcode::BitCastU64F64:
|
||||
case IR::Opcode::PackDouble2x32:
|
||||
case IR::Opcode::UnpackDouble2x32:
|
||||
info.uses_fp64 = true;
|
||||
break;
|
||||
case IR::Opcode::ImageWrite:
|
||||
|
||||
Reference in New Issue
Block a user