mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
DONT MERGE set log file to /dev/null DONT MERGE linux pthread bb fix save work DONT MERGE dump ir save more work fix mistake with ES shader skip list add input patch control points dynamic state random stuff
273 lines
11 KiB
C++
273 lines
11 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
#include "shader_recompiler/runtime_info.h"
|
|
#pragma clang optimize off
|
|
#include "shader_recompiler/frontend/translate/translate.h"
|
|
#include "shader_recompiler/ir/reg.h"
|
|
|
|
namespace Shader::Gcn {
|
|
|
|
void Translator::EmitDataShare(const GcnInst& inst) {
|
|
switch (inst.opcode) {
|
|
// DS
|
|
case Opcode::DS_ADD_U32:
|
|
return DS_ADD_U32(inst, false);
|
|
case Opcode::DS_MIN_I32:
|
|
return DS_MIN_U32(inst, true, false);
|
|
case Opcode::DS_MAX_I32:
|
|
return DS_MAX_U32(inst, true, false);
|
|
case Opcode::DS_MIN_U32:
|
|
return DS_MIN_U32(inst, false, false);
|
|
case Opcode::DS_MAX_U32:
|
|
return DS_MAX_U32(inst, false, false);
|
|
case Opcode::DS_AND_B32:
|
|
return DS_AND_B32(inst, false);
|
|
case Opcode::DS_OR_B32:
|
|
return DS_OR_B32(inst, false);
|
|
case Opcode::DS_XOR_B32:
|
|
return DS_XOR_B32(inst, false);
|
|
case Opcode::DS_WRITE_B32:
|
|
return DS_WRITE(32, false, false, false, inst);
|
|
case Opcode::DS_WRITE2_B32:
|
|
return DS_WRITE(32, false, true, false, inst);
|
|
case Opcode::DS_WRITE2ST64_B32:
|
|
return DS_WRITE(32, false, true, true, inst);
|
|
case Opcode::DS_ADD_RTN_U32:
|
|
return DS_ADD_U32(inst, true);
|
|
case Opcode::DS_MIN_RTN_U32:
|
|
return DS_MIN_U32(inst, false, true);
|
|
case Opcode::DS_MAX_RTN_U32:
|
|
return DS_MAX_U32(inst, false, true);
|
|
case Opcode::DS_AND_RTN_B32:
|
|
return DS_AND_B32(inst, true);
|
|
case Opcode::DS_OR_RTN_B32:
|
|
return DS_OR_B32(inst, true);
|
|
case Opcode::DS_XOR_RTN_B32:
|
|
return DS_XOR_B32(inst, true);
|
|
case Opcode::DS_SWIZZLE_B32:
|
|
return DS_SWIZZLE_B32(inst);
|
|
case Opcode::DS_READ_B32:
|
|
return DS_READ(32, false, false, false, inst);
|
|
case Opcode::DS_READ2_B32:
|
|
return DS_READ(32, false, true, false, inst);
|
|
case Opcode::DS_READ2ST64_B32:
|
|
return DS_READ(32, false, true, true, inst);
|
|
case Opcode::DS_CONSUME:
|
|
return DS_CONSUME(inst);
|
|
case Opcode::DS_APPEND:
|
|
return DS_APPEND(inst);
|
|
case Opcode::DS_WRITE_B64:
|
|
return DS_WRITE(64, false, false, false, inst);
|
|
case Opcode::DS_WRITE2_B64:
|
|
return DS_WRITE(64, false, true, false, inst);
|
|
case Opcode::DS_READ_B64:
|
|
return DS_READ(64, false, false, false, inst);
|
|
case Opcode::DS_READ2_B64:
|
|
return DS_READ(64, false, true, false, inst);
|
|
default:
|
|
LogMissingOpcode(inst);
|
|
}
|
|
}
|
|
|
|
// VOP2
|
|
|
|
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
|
const IR::U32 value{GetSrc(inst.src[0])};
|
|
|
|
if (info.l_stage == LogicalStage::Compute ||
|
|
info.l_stage == LogicalStage::TessellationControl) {
|
|
SetDst(inst.dst[0], ir.ReadFirstLane(value));
|
|
} else {
|
|
SetDst(inst.dst[0], value);
|
|
}
|
|
}
|
|
|
|
void Translator::V_READLANE_B32(const GcnInst& inst) {
|
|
const IR::U32 value{GetSrc(inst.src[0])};
|
|
const IR::U32 lane{GetSrc(inst.src[1])};
|
|
SetDst(inst.dst[0], ir.ReadLane(value, lane));
|
|
}
|
|
|
|
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
|
const IR::VectorReg dst{inst.dst[0].code};
|
|
const IR::U32 value{GetSrc(inst.src[0])};
|
|
const IR::U32 lane{GetSrc(inst.src[1])};
|
|
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
|
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
|
|
}
|
|
|
|
// DS
|
|
|
|
void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
const IR::U32 data{GetSrc(inst.src[1])};
|
|
const IR::U32 offset =
|
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
|
if (rtn) {
|
|
SetDst(inst.dst[0], IR::U32{original_val});
|
|
}
|
|
}
|
|
|
|
void Translator::DS_MIN_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
const IR::U32 data{GetSrc(inst.src[1])};
|
|
const IR::U32 offset =
|
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, is_signed);
|
|
if (rtn) {
|
|
SetDst(inst.dst[0], IR::U32{original_val});
|
|
}
|
|
}
|
|
|
|
void Translator::DS_MAX_U32(const GcnInst& inst, bool is_signed, bool rtn) {
|
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
const IR::U32 data{GetSrc(inst.src[1])};
|
|
const IR::U32 offset =
|
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, is_signed);
|
|
if (rtn) {
|
|
SetDst(inst.dst[0], IR::U32{original_val});
|
|
}
|
|
}
|
|
|
|
void Translator::DS_AND_B32(const GcnInst& inst, bool rtn) {
|
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
const IR::U32 data{GetSrc(inst.src[1])};
|
|
const IR::U32 offset =
|
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
const IR::Value original_val = ir.SharedAtomicAnd(addr_offset, data);
|
|
if (rtn) {
|
|
SetDst(inst.dst[0], IR::U32{original_val});
|
|
}
|
|
}
|
|
|
|
void Translator::DS_OR_B32(const GcnInst& inst, bool rtn) {
|
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
const IR::U32 data{GetSrc(inst.src[1])};
|
|
const IR::U32 offset =
|
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
const IR::Value original_val = ir.SharedAtomicOr(addr_offset, data);
|
|
if (rtn) {
|
|
SetDst(inst.dst[0], IR::U32{original_val});
|
|
}
|
|
}
|
|
|
|
void Translator::DS_XOR_B32(const GcnInst& inst, bool rtn) {
|
|
const IR::U32 addr{GetSrc(inst.src[0])};
|
|
const IR::U32 data{GetSrc(inst.src[1])};
|
|
const IR::U32 offset =
|
|
ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0));
|
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
|
const IR::Value original_val = ir.SharedAtomicXor(addr_offset, data);
|
|
if (rtn) {
|
|
SetDst(inst.dst[0], IR::U32{original_val});
|
|
}
|
|
}
|
|
|
|
void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
|
const GcnInst& inst) {
|
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
|
const IR::VectorReg data0{inst.src[1].code};
|
|
const IR::VectorReg data1{inst.src[2].code};
|
|
if (is_pair) {
|
|
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
|
if (bit_size == 32) {
|
|
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
|
|
} else {
|
|
ir.WriteShared(
|
|
64, ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1)),
|
|
addr0);
|
|
}
|
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
|
if (bit_size == 32) {
|
|
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
|
|
} else {
|
|
ir.WriteShared(
|
|
64, ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1)),
|
|
addr1);
|
|
}
|
|
} else if (bit_size == 64) {
|
|
const IR::U32 addr0 = ir.IAdd(
|
|
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
const IR::Value data =
|
|
ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1));
|
|
ir.WriteShared(bit_size, data, addr0);
|
|
} else {
|
|
const IR::U32 addr0 = ir.IAdd(
|
|
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
ir.WriteShared(bit_size, ir.GetVectorReg(data0), addr0);
|
|
}
|
|
}
|
|
|
|
void Translator::DS_SWIZZLE_B32(const GcnInst& inst) {
|
|
const u8 offset0 = inst.control.ds.offset0;
|
|
const u8 offset1 = inst.control.ds.offset1;
|
|
const IR::U32 src{GetSrc(inst.src[0])};
|
|
// ASSERT(offset1 & 0x80);
|
|
const IR::U32 lane_id = ir.LaneId();
|
|
const IR::U32 id_in_group = ir.BitwiseAnd(lane_id, ir.Imm32(0b11));
|
|
const IR::U32 base = ir.ShiftLeftLogical(id_in_group, ir.Imm32(1));
|
|
const IR::U32 index = ir.BitFieldExtract(ir.Imm32(offset0), base, ir.Imm32(2));
|
|
SetDst(inst.dst[0], ir.QuadShuffle(src, index));
|
|
}
|
|
|
|
void Translator::DS_READ(int bit_size, bool is_signed, bool is_pair, bool stride64,
|
|
const GcnInst& inst) {
|
|
const IR::U32 addr{ir.GetVectorReg(IR::VectorReg(inst.src[0].code))};
|
|
IR::VectorReg dst_reg{inst.dst[0].code};
|
|
if (is_pair) {
|
|
// Pair loads are either 32 or 64-bit
|
|
const u32 adj = (bit_size == 32 ? 4 : 8) * (stride64 ? 64 : 1);
|
|
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset0 * adj)));
|
|
const IR::Value data0 = ir.LoadShared(bit_size, is_signed, addr0);
|
|
if (bit_size == 32) {
|
|
ir.SetVectorReg(dst_reg++, IR::U32{data0});
|
|
} else {
|
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 0)});
|
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data0, 1)});
|
|
}
|
|
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
|
|
const IR::Value data1 = ir.LoadShared(bit_size, is_signed, addr1);
|
|
if (bit_size == 32) {
|
|
ir.SetVectorReg(dst_reg++, IR::U32{data1});
|
|
} else {
|
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 0)});
|
|
ir.SetVectorReg(dst_reg++, IR::U32{ir.CompositeExtract(data1, 1)});
|
|
}
|
|
} else if (bit_size == 64) {
|
|
const IR::U32 addr0 = ir.IAdd(
|
|
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
const IR::Value data = ir.LoadShared(bit_size, is_signed, addr0);
|
|
ir.SetVectorReg(dst_reg, IR::U32{ir.CompositeExtract(data, 0)});
|
|
ir.SetVectorReg(dst_reg + 1, IR::U32{ir.CompositeExtract(data, 1)});
|
|
} else {
|
|
const IR::U32 addr0 = ir.IAdd(
|
|
addr, ir.Imm32((u32(inst.control.ds.offset1) << 8u) + u32(inst.control.ds.offset0)));
|
|
const IR::U32 data = IR::U32{ir.LoadShared(bit_size, is_signed, addr0)};
|
|
ir.SetVectorReg(dst_reg, data);
|
|
}
|
|
}
|
|
|
|
void Translator::DS_APPEND(const GcnInst& inst) {
|
|
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
|
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
|
const IR::U32 prev = ir.DataAppend(gds_offset);
|
|
SetDst(inst.dst[0], prev);
|
|
}
|
|
|
|
void Translator::DS_CONSUME(const GcnInst& inst) {
|
|
const u32 inst_offset = (u32(inst.control.ds.offset1) << 8u) + inst.control.ds.offset0;
|
|
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
|
|
const IR::U32 prev = ir.DataConsume(gds_offset);
|
|
SetDst(inst.dst[0], prev);
|
|
}
|
|
|
|
} // namespace Shader::Gcn
|