mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 08:22:32 +00:00
* shader_recompiler: Move shared mem lowering into emitter * IR can be quite verbose during first stages of translation, before ssa and constant prop passes have run that drastically simplify it. This lowering can also be done during emission so why not do it then to save some compilation time * runtime_info: Pack PsColorBuffer into 8 bytes * Drops the size of the total structure by half from 396 to 204 bytes. Also should make comparison of the array a bit faster, since its a hot path done every draw * emit_spirv_context: Add infrastructure for buffer aliases * Splits out the buffer creation function so it can be reused when defining multiple type aliases * shader_recompiler: Merge srt_flatbuf into buffers list * Its no longer a special case, yay * shader_recompiler: Complete buffer aliasing support * Add a bunch more types into buffers, such as F32 for float reads/writes and 8/16 bit integer types for formatted buffers * shader_recompiler: Remove existing shared memory emulation * The current impl relies on backend side implementaton and hooking into every shared memory access. It also doesnt handle atomics. Will be replaced by an IR pass that solves these issues * shader_recompiler: Reintroduce shared memory on ssbo emulation * Now it is performed with an IR pass, and combined with the previous commit cleanup, is fully transparent from the backend, other than requiring workgroup_index be provided as an attribute (computing this on every shared memory access is gonna be too verbose * clang format * buffer_cache: Reduce buffer sizes * vk_rasterizer: Cleanup resource binding code * Reduce noise in the functions, also remove some arguments which are class members * Fix gcc
575 lines
20 KiB
C++
575 lines
20 KiB
C++
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "common/config.h"
|
|
#include "common/io_file.h"
|
|
#include "common/path_util.h"
|
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
|
#include "shader_recompiler/frontend/translate/translate.h"
|
|
#include "shader_recompiler/info.h"
|
|
#include "shader_recompiler/ir/attribute.h"
|
|
#include "shader_recompiler/ir/reg.h"
|
|
#include "shader_recompiler/ir/reinterpret.h"
|
|
#include "shader_recompiler/runtime_info.h"
|
|
#include "video_core/amdgpu/resource.h"
|
|
#include "video_core/amdgpu/types.h"
|
|
|
|
#define MAGIC_ENUM_RANGE_MIN 0
|
|
#define MAGIC_ENUM_RANGE_MAX 1515
|
|
#include <magic_enum/magic_enum.hpp>
|
|
|
|
namespace Shader::Gcn {
|
|
|
|
static u32 next_vgpr_num;
|
|
static std::unordered_map<u32, IR::VectorReg> vgpr_map;
|
|
|
|
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
|
const Profile& profile_)
|
|
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {
|
|
next_vgpr_num = vgpr_map.empty() ? runtime_info.num_allocated_vgprs : next_vgpr_num;
|
|
}
|
|
|
|
void Translator::EmitPrologue() {
|
|
ir.Prologue();
|
|
ir.SetExec(ir.Imm1(true));
|
|
|
|
// Initialize user data.
|
|
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
|
for (u32 i = 0; i < runtime_info.num_user_data; i++) {
|
|
ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg));
|
|
++dst_sreg;
|
|
}
|
|
|
|
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
|
switch (info.l_stage) {
|
|
case LogicalStage::Vertex:
|
|
// v0: vertex ID, always present
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
|
// v1: instance ID, step rate 0
|
|
if (runtime_info.num_input_vgprs > 0) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
|
|
}
|
|
// v2: instance ID, step rate 1
|
|
if (runtime_info.num_input_vgprs > 1) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
|
|
}
|
|
// v3: instance ID, plain
|
|
if (runtime_info.num_input_vgprs > 2) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
|
}
|
|
break;
|
|
case LogicalStage::Fragment:
|
|
dst_vreg = IR::VectorReg::V0;
|
|
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
|
|
++dst_vreg; // I
|
|
++dst_vreg; // J
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
|
|
++dst_vreg; // I
|
|
++dst_vreg; // J
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
|
|
++dst_vreg; // I
|
|
++dst_vreg; // J
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
|
|
++dst_vreg; // I/W
|
|
++dst_vreg; // J/W
|
|
++dst_vreg; // 1/W
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
|
|
++dst_vreg; // I
|
|
++dst_vreg; // J
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
|
|
++dst_vreg; // I
|
|
++dst_vreg; // J
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
|
|
++dst_vreg; // I
|
|
++dst_vreg; // J
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
|
|
++dst_vreg;
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
|
|
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
|
|
} else {
|
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
|
|
}
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.pos_y_float_ena) {
|
|
if (runtime_info.fs_info.en_flags.pos_y_float_ena) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 1));
|
|
} else {
|
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
|
|
}
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.pos_z_float_ena) {
|
|
if (runtime_info.fs_info.en_flags.pos_z_float_ena) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 2));
|
|
} else {
|
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
|
|
}
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.pos_w_float_ena) {
|
|
if (runtime_info.fs_info.en_flags.pos_w_float_ena) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3));
|
|
} else {
|
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
|
|
}
|
|
}
|
|
if (runtime_info.fs_info.addr_flags.front_face_ena) {
|
|
if (runtime_info.fs_info.en_flags.front_face_ena) {
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace));
|
|
} else {
|
|
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
|
|
}
|
|
}
|
|
break;
|
|
case LogicalStage::TessellationControl: {
|
|
ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
|
// Should be laid out like:
|
|
// [0:8]: patch id within VGT
|
|
// [8:12]: output control point id
|
|
ir.SetVectorReg(IR::VectorReg::V1,
|
|
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
|
|
break;
|
|
}
|
|
case LogicalStage::TessellationEval:
|
|
ir.SetVectorReg(IR::VectorReg::V0,
|
|
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
|
|
ir.SetVectorReg(IR::VectorReg::V1,
|
|
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
|
|
// V2 is similar to PrimitiveID but not the same. It seems to only be used in
|
|
// compiler-generated address calculations. Its probably the patch id within the
|
|
// patches running locally on a given VGT (or CU, whichever is the granularity of LDS
|
|
// memory)
|
|
// Set to 0. See explanation in comment describing hull/domain passes
|
|
ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u));
|
|
// V3 is the actual PrimitiveID as intended by the shader author.
|
|
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
|
break;
|
|
case LogicalStage::Compute:
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
|
|
|
if (runtime_info.cs_info.tgid_enable[0]) {
|
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
|
}
|
|
if (runtime_info.cs_info.tgid_enable[1]) {
|
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
|
}
|
|
if (runtime_info.cs_info.tgid_enable[2]) {
|
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
|
}
|
|
break;
|
|
case LogicalStage::Geometry:
|
|
switch (runtime_info.gs_info.out_primitive[0]) {
|
|
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
|
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
|
[[fallthrough]];
|
|
case AmdGpu::GsOutputPrimitiveType::LineStrip:
|
|
ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1
|
|
[[fallthrough]];
|
|
default:
|
|
ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0
|
|
break;
|
|
}
|
|
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
|
break;
|
|
default:
|
|
UNREACHABLE_MSG("Unknown shader stage");
|
|
}
|
|
|
|
// Clear any scratch vgpr mappings for next shader.
|
|
vgpr_map.clear();
|
|
}
|
|
|
|
IR::VectorReg Translator::GetScratchVgpr(u32 offset) {
|
|
const auto [it, is_new] = vgpr_map.try_emplace(offset);
|
|
if (is_new) {
|
|
ASSERT_MSG(next_vgpr_num < 256, "Out of VGPRs");
|
|
const auto new_vgpr = static_cast<IR::VectorReg>(next_vgpr_num++);
|
|
it->second = new_vgpr;
|
|
}
|
|
return it->second;
|
|
};
|
|
|
|
template <typename T>
|
|
T Translator::GetSrc(const InstOperand& operand) {
|
|
constexpr bool is_float = std::is_same_v<T, IR::F32>;
|
|
|
|
const auto get_imm = [&](auto value) -> T {
|
|
if constexpr (is_float) {
|
|
return ir.Imm32(std::bit_cast<float>(value));
|
|
} else {
|
|
return ir.Imm32(std::bit_cast<u32>(value));
|
|
}
|
|
};
|
|
|
|
T value{};
|
|
switch (operand.field) {
|
|
case OperandField::ScalarGPR:
|
|
value = ir.GetScalarReg<T>(IR::ScalarReg(operand.code));
|
|
break;
|
|
case OperandField::VectorGPR:
|
|
value = ir.GetVectorReg<T>(IR::VectorReg(operand.code));
|
|
break;
|
|
case OperandField::ConstZero:
|
|
value = get_imm(0U);
|
|
break;
|
|
case OperandField::SignedConstIntPos:
|
|
value = get_imm(operand.code - SignedConstIntPosMin + 1);
|
|
break;
|
|
case OperandField::SignedConstIntNeg:
|
|
value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1);
|
|
break;
|
|
case OperandField::LiteralConst:
|
|
value = get_imm(operand.code);
|
|
break;
|
|
case OperandField::ConstFloatPos_1_0:
|
|
value = get_imm(1.f);
|
|
break;
|
|
case OperandField::ConstFloatPos_0_5:
|
|
value = get_imm(0.5f);
|
|
break;
|
|
case OperandField::ConstFloatPos_2_0:
|
|
value = get_imm(2.0f);
|
|
break;
|
|
case OperandField::ConstFloatPos_4_0:
|
|
value = get_imm(4.0f);
|
|
break;
|
|
case OperandField::ConstFloatNeg_0_5:
|
|
value = get_imm(-0.5f);
|
|
break;
|
|
case OperandField::ConstFloatNeg_1_0:
|
|
value = get_imm(-1.0f);
|
|
break;
|
|
case OperandField::ConstFloatNeg_2_0:
|
|
value = get_imm(-2.0f);
|
|
break;
|
|
case OperandField::ConstFloatNeg_4_0:
|
|
value = get_imm(-4.0f);
|
|
break;
|
|
case OperandField::VccLo:
|
|
if constexpr (is_float) {
|
|
value = ir.BitCast<IR::F32>(ir.GetVccLo());
|
|
} else {
|
|
value = ir.GetVccLo();
|
|
}
|
|
break;
|
|
case OperandField::VccHi:
|
|
if constexpr (is_float) {
|
|
value = ir.BitCast<IR::F32>(ir.GetVccHi());
|
|
} else {
|
|
value = ir.GetVccHi();
|
|
}
|
|
break;
|
|
case OperandField::M0:
|
|
if constexpr (is_float) {
|
|
value = ir.BitCast<IR::F32>(ir.GetM0());
|
|
} else {
|
|
value = ir.GetM0();
|
|
}
|
|
break;
|
|
case OperandField::Scc:
|
|
if constexpr (is_float) {
|
|
UNREACHABLE();
|
|
} else {
|
|
value = ir.BitCast<IR::U32>(ir.GetScc());
|
|
}
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
|
|
if constexpr (is_float) {
|
|
if (operand.input_modifier.abs) {
|
|
value = ir.FPAbs(value);
|
|
}
|
|
if (operand.input_modifier.neg) {
|
|
value = ir.FPNeg(value);
|
|
}
|
|
} else {
|
|
if (operand.input_modifier.abs) {
|
|
value = ir.IAbs(value);
|
|
}
|
|
if (operand.input_modifier.neg) {
|
|
value = ir.INeg(value);
|
|
}
|
|
}
|
|
return value;
|
|
}
|
|
|
|
template IR::U32 Translator::GetSrc<IR::U32>(const InstOperand&);
|
|
template IR::F32 Translator::GetSrc<IR::F32>(const InstOperand&);
|
|
|
|
template <typename T>
|
|
T Translator::GetSrc64(const InstOperand& operand) {
|
|
constexpr bool is_float = std::is_same_v<T, IR::F64>;
|
|
|
|
const auto get_imm = [&](auto value) -> T {
|
|
if constexpr (is_float) {
|
|
return ir.Imm64(std::bit_cast<double>(value));
|
|
} else {
|
|
return ir.Imm64(std::bit_cast<u64>(value));
|
|
}
|
|
};
|
|
|
|
T value{};
|
|
switch (operand.field) {
|
|
case OperandField::ScalarGPR: {
|
|
const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code));
|
|
const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1));
|
|
if constexpr (is_float) {
|
|
UNREACHABLE();
|
|
} else {
|
|
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
|
|
}
|
|
break;
|
|
}
|
|
case OperandField::VectorGPR: {
|
|
const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code));
|
|
const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1));
|
|
if constexpr (is_float) {
|
|
value = ir.PackFloat2x32(ir.CompositeConstruct(value_lo, value_hi));
|
|
} else {
|
|
value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi));
|
|
}
|
|
break;
|
|
}
|
|
case OperandField::ConstZero:
|
|
value = get_imm(0ULL);
|
|
break;
|
|
case OperandField::SignedConstIntPos:
|
|
value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1);
|
|
break;
|
|
case OperandField::SignedConstIntNeg:
|
|
value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1);
|
|
break;
|
|
case OperandField::LiteralConst:
|
|
value = get_imm(u64(operand.code));
|
|
break;
|
|
case OperandField::ConstFloatPos_1_0:
|
|
value = get_imm(1.0);
|
|
break;
|
|
case OperandField::ConstFloatPos_0_5:
|
|
value = get_imm(0.5);
|
|
break;
|
|
case OperandField::ConstFloatPos_2_0:
|
|
value = get_imm(2.0);
|
|
break;
|
|
case OperandField::ConstFloatPos_4_0:
|
|
value = get_imm(4.0);
|
|
break;
|
|
case OperandField::ConstFloatNeg_0_5:
|
|
value = get_imm(-0.5);
|
|
break;
|
|
case OperandField::ConstFloatNeg_1_0:
|
|
value = get_imm(-1.0);
|
|
break;
|
|
case OperandField::ConstFloatNeg_2_0:
|
|
value = get_imm(-2.0);
|
|
break;
|
|
case OperandField::ConstFloatNeg_4_0:
|
|
value = get_imm(-4.0);
|
|
break;
|
|
case OperandField::VccLo:
|
|
if constexpr (is_float) {
|
|
UNREACHABLE();
|
|
} else {
|
|
value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi()));
|
|
}
|
|
break;
|
|
case OperandField::VccHi:
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
|
|
if constexpr (is_float) {
|
|
if (operand.input_modifier.abs) {
|
|
value = ir.FPAbs(value);
|
|
}
|
|
if (operand.input_modifier.neg) {
|
|
value = ir.FPNeg(value);
|
|
}
|
|
}
|
|
return value;
|
|
}
|
|
|
|
template IR::U64 Translator::GetSrc64<IR::U64>(const InstOperand&);
|
|
template IR::F64 Translator::GetSrc64<IR::F64>(const InstOperand&);
|
|
|
|
void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
|
|
IR::U32F32 result = value;
|
|
if (value.Type() == IR::Type::F32) {
|
|
if (operand.output_modifier.multiplier != 0.f) {
|
|
result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier));
|
|
}
|
|
if (operand.output_modifier.clamp) {
|
|
result = ir.FPSaturate(value);
|
|
}
|
|
}
|
|
|
|
switch (operand.field) {
|
|
case OperandField::ScalarGPR:
|
|
return ir.SetScalarReg(IR::ScalarReg(operand.code), result);
|
|
case OperandField::VectorGPR:
|
|
return ir.SetVectorReg(IR::VectorReg(operand.code), result);
|
|
case OperandField::VccLo:
|
|
return ir.SetVccLo(result);
|
|
case OperandField::VccHi:
|
|
return ir.SetVccHi(result);
|
|
case OperandField::M0:
|
|
return ir.SetM0(result);
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_raw) {
|
|
IR::U64F64 value_untyped = value_raw;
|
|
|
|
const bool is_float = value_raw.Type() == IR::Type::F64 || value_raw.Type() == IR::Type::F32;
|
|
if (is_float) {
|
|
if (operand.output_modifier.multiplier != 0.f) {
|
|
value_untyped =
|
|
ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier)));
|
|
}
|
|
if (operand.output_modifier.clamp) {
|
|
value_untyped = ir.FPSaturate(value_raw);
|
|
}
|
|
}
|
|
const IR::U64 value =
|
|
is_float ? ir.BitCast<IR::U64>(IR::F64{value_untyped}) : IR::U64{value_untyped};
|
|
|
|
const IR::Value unpacked{ir.UnpackUint2x32(value)};
|
|
const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)};
|
|
const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)};
|
|
switch (operand.field) {
|
|
case OperandField::ScalarGPR:
|
|
ir.SetScalarReg(IR::ScalarReg(operand.code + 1), hi);
|
|
return ir.SetScalarReg(IR::ScalarReg(operand.code), lo);
|
|
case OperandField::VectorGPR:
|
|
ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi);
|
|
return ir.SetVectorReg(IR::VectorReg(operand.code), lo);
|
|
case OperandField::VccLo:
|
|
ir.SetVccLo(lo);
|
|
return ir.SetVccHi(hi);
|
|
case OperandField::VccHi:
|
|
UNREACHABLE();
|
|
case OperandField::M0:
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
|
|
void Translator::EmitFetch(const GcnInst& inst) {
|
|
// Read the pointer to the fetch shader assembly.
|
|
info.has_fetch_shader = true;
|
|
info.fetch_shader_sgpr_base = inst.src[0].code;
|
|
|
|
const auto fetch_data = ParseFetchShader(info);
|
|
ASSERT(fetch_data.has_value());
|
|
|
|
if (Config::dumpShaders()) {
|
|
using namespace Common::FS;
|
|
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
|
|
if (!std::filesystem::exists(dump_dir)) {
|
|
std::filesystem::create_directories(dump_dir);
|
|
}
|
|
const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash);
|
|
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
|
|
file.WriteRaw<u8>(fetch_data->code, fetch_data->size);
|
|
}
|
|
|
|
for (const auto& attrib : fetch_data->attributes) {
|
|
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
|
|
IR::VectorReg dst_reg{attrib.dest_vgpr};
|
|
|
|
// Read the V# of the attribute to figure out component number and type.
|
|
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
|
|
const auto values =
|
|
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
|
|
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
|
|
const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect());
|
|
for (u32 i = 0; i < 4; i++) {
|
|
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
|
|
}
|
|
|
|
// In case of programmable step rates we need to fallback to instance data pulling in
|
|
// shader, so VBs should be bound as regular data buffers
|
|
if (attrib.UsesStepRates()) {
|
|
info.buffers.push_back({
|
|
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
|
|
.used_types = IR::Type::F32,
|
|
.instance_attrib = attrib.semantic,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
void Translator::LogMissingOpcode(const GcnInst& inst) {
|
|
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
|
|
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
|
|
magic_enum::enum_name(inst.category));
|
|
info.translation_failed = true;
|
|
}
|
|
|
|
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
|
const RuntimeInfo& runtime_info, const Profile& profile) {
|
|
if (inst_list.empty()) {
|
|
return;
|
|
}
|
|
Translator translator{block, info, runtime_info, profile};
|
|
for (const auto& inst : inst_list) {
|
|
pc += inst.length;
|
|
|
|
// Special case for emitting fetch shader.
|
|
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
|
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
|
info.stage == Stage::Local);
|
|
translator.EmitFetch(inst);
|
|
continue;
|
|
}
|
|
|
|
// Emit instructions for each category.
|
|
switch (inst.category) {
|
|
case InstCategory::DataShare:
|
|
translator.EmitDataShare(inst);
|
|
break;
|
|
case InstCategory::VectorInterpolation:
|
|
translator.EmitVectorInterpolation(inst);
|
|
break;
|
|
case InstCategory::ScalarMemory:
|
|
translator.EmitScalarMemory(inst);
|
|
break;
|
|
case InstCategory::VectorMemory:
|
|
translator.EmitVectorMemory(inst);
|
|
break;
|
|
case InstCategory::Export:
|
|
translator.EmitExport(inst);
|
|
break;
|
|
case InstCategory::FlowControl:
|
|
translator.EmitFlowControl(pc, inst);
|
|
break;
|
|
case InstCategory::ScalarALU:
|
|
translator.EmitScalarAlu(inst);
|
|
break;
|
|
case InstCategory::VectorALU:
|
|
translator.EmitVectorAlu(inst);
|
|
break;
|
|
case InstCategory::DebugProfile:
|
|
break;
|
|
default:
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace Shader::Gcn
|