WIP Tessellation partial implementation. Squash commits

This commit is contained in:
Frodo Baggins 2024-10-23 23:20:23 -07:00
parent 6a4cf2763a
commit 917e02f997
33 changed files with 1131 additions and 249 deletions

View File

@ -16,6 +16,10 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_definitions(_DEBUG)
endif()
project(shadPS4)
# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.

View File

@ -62,8 +62,7 @@ private:
class FileBackend {
public:
explicit FileBackend(const std::filesystem::path& filename)
: file{std::filesystem::path("/dev/null"), FS::FileAccessMode::Write,
FS::FileType::TextFile} {}
: file{filename, FS::FileAccessMode::Write, FS::FileType::TextFile} {}
~FileBackend() = default;

View File

@ -1642,7 +1642,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
LOG_TRACE(Lib_GnmDriver, "called");
if (!cmdbuf || size < 0x1E) {
return -1;
}
@ -1660,11 +1659,19 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
// This is wrong but just stash them here for now
// Should read the tess constants buffer instead, which is bound as V#, into runtime_info.
// HsConstants member of HsProgram is used to derive TessellationDataConstantBuffer, its members
// dont correspond to real registers
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x11cu, hs_regs[4], hs_regs[5], hs_regs[6], hs_regs[7],
hs_regs[8], hs_regs[9], hs_regs[10], hs_regs[11], hs_regs[12],
hs_regs[13]); // TODO comment
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
hs_regs[6]); // VGT_HOS_MAX_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
// right padding?
WriteTrailingNop<11>(cmdbuf);
return ORBIS_OK;
}

View File

@ -47,15 +47,24 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
}
}
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, Id array_index, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index)};
ASSERT(info.num_components > 0);
if (info.num_components == 1) {
Id base = info.id;
boost::container::small_vector<Id, 2> indices;
if (ctx.l_stage == LogicalStage::TessellationControl) {
indices.push_back(array_index);
}
if (info.num_components > 1) {
indices.push_back(ctx.ConstU32(element));
}
if (indices.empty()) {
return info.id;
} else {
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
return ctx.OpAccessChain(info.pointer_type, info.id, indices);
}
}
if (IR::IsMrt(attr)) {
@ -84,6 +93,10 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
}
}
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
return OutputAttrPointer(ctx, attr, {}, element);
}
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
@ -175,12 +188,11 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
}
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (IR::IsPosition(attr)) {
ASSERT(attr == IR::Attribute::Position0);
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))};
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -190,7 +202,7 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@ -198,9 +210,27 @@ Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u
UNREACHABLE();
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
if (ctx.info.stage == Stage::Geometry) {
return EmitGetAttributeForGeometry(ctx, attr, comp, index);
} else if (ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval) {
if (IR::IsTessCoord(attr)) {
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
const auto pointer{
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
return ctx.OpLoad(ctx.F32[1], pointer);
} else if (IR::IsParam(attr)) {
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
const auto param = ctx.input_params.at(param_id).id;
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
return ctx.OpLoad(ctx.F32[1],
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
}
UNREACHABLE();
}
if (IR::IsParam(attr)) {
@ -276,6 +306,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
case IR::Attribute::TessPatchIdInVgt: // TODO see why this isnt DCEd
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval);
@ -301,7 +332,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
Id pointer;
if (ctx.l_stage == LogicalStage::TessellationControl) {
pointer = OutputAttrPointer(ctx, attr, ctx.OpLoad(ctx.U32[1], ctx.invocation_id), element);
} else {
pointer = OutputAttrPointer(ctx, attr, element);
}
const auto component_type{OutputAttrComponentType(ctx, attr)};
if (component_type.second) {
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));

View File

@ -85,7 +85,7 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);

View File

@ -388,13 +388,52 @@ void EmitContext::DefineInputs() {
}
case LogicalStage::TessellationControl: {
invocation_id =
DefineVariable(U32[3], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
patch_vertices =
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.loads.GetAny(param)) {
continue;
}
const u32 num_components = info.loads.NumComponents(param);
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id type{TypeArray(F32[4], ConstU32(32u))};
const Id id{DefineInput(type, i)};
Name(id, fmt::format("in_attr{}", i));
input_params[i] = {id, input_f32, F32[1], 4};
}
break;
}
case LogicalStage::TessellationEval: {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.loads.GetAny(param)) {
continue;
}
const u32 num_components = info.loads.NumComponents(param);
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id type{TypeArray(F32[4], ConstU32(32u))};
const Id id{DefineInput(type, i)};
Name(id, fmt::format("in_attr{}", i));
input_params[i] = {id, input_f32, F32[1], 4};
}
u32 patch_base_location = runtime_info.vs_info.hs_output_cp_stride >> 4;
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineInput(F32[4], patch_base_location + index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_in{}", index));
patches[index] = id;
}
break;
}
default:
@ -405,6 +444,9 @@ void EmitContext::DefineInputs() {
void EmitContext::DefineOutputs() {
switch (l_stage) {
case LogicalStage::Vertex: {
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
// Might cause problems linking with tcs
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
@ -442,16 +484,58 @@ void EmitContext::DefineOutputs() {
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
Decorate(output_tess_level_inner, spv::Decoration::Patch);
}
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id type{TypeArray(F32[4], ConstU32(runtime_info.hs_info.output_control_points))};
const Id id{DefineOutput(type, i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], 4};
}
u32 patch_base_location = runtime_info.hs_info.hs_output_cp_stride >> 4;
for (size_t index = 0; index < 30; ++index) {
if (!(info.uses_patches & (1U << index))) {
continue;
}
const Id id{DefineOutput(F32[4], index)};
const Id id{DefineOutput(F32[4], patch_base_location + index)};
Decorate(id, spv::Decoration::Patch);
Name(id, fmt::format("patch_out{}", index));
patches[index] = id;
}
break;
}
case LogicalStage::TessellationEval: {
// TODO copied from logical vertex, figure this out
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
info.stores.Get(IR::Attribute::Position2) ||
info.stores.Get(IR::Attribute::Position3);
if (has_extra_pos_stores) {
const Id type{TypeArray(F32[1], ConstU32(8U))};
clip_distances =
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
cull_distances =
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
}
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] =
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
}
break;
}
case LogicalStage::Fragment:
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};

View File

@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "common/types.h"
namespace Shader {
struct TessellationDataConstantBuffer {
u32 m_lsStride;
u32 m_hsCpStride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
// ls_stride
u32 m_hsNumPatch; // num patches submitted in threadgroup
u32 m_hsOutputBase; // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
// HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
u32 m_patchConstSize; // 16 * num_patch_attrs
u32 m_patchConstBase; // hs_output_base + patch_output_size
u32 m_patchOutputSize; // output_cp_stride * num_output_cp
f32 m_offChipTessellationFactorThreshold;
u32 m_firstEdgeTessFactorIndex;
};
} // namespace Shader

View File

@ -81,9 +81,9 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_BFE_I32:
return S_BFE_I32(inst);
return S_BFE(inst, true);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
return S_BFE(inst, false);
case Opcode::S_ABSDIFF_I32:
return S_ABSDIFF_I32(inst);
@ -438,30 +438,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
}
void Translator::S_BFE_U32(const GcnInst& inst) {
void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_BFE_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
IR::U32 result;
ASSERT_MSG(src1.IsImmediate(), "Unhandled S_BFE_I32 with non-immediate mask");
u32 mask = src1.U32();
ASSERT(mask != 0);
u32 offset = std::countr_zero(mask);
u32 count = std::popcount(mask);
mask = mask >> offset;
ASSERT_MSG((mask & (mask + 1)) == 0, "mask {} has non-adjacent bits set");
result = ir.BitFieldExtract(src0, ir.Imm32(offset), ir.Imm32(count), true);
const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}

View File

@ -35,7 +35,12 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
}
void Translator::S_BARRIER() {
ir.Barrier();
if (info.l_stage == LogicalStage::TessellationControl) {
// TODO: ASSERT that we're in uniform control flow
ir.TcsOutputBarrier();
} else {
ir.Barrier();
}
}
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {

View File

@ -94,7 +94,7 @@ public:
void S_ASHR_I32(const GcnInst& inst);
void S_BFM_B32(const GcnInst& inst);
void S_MUL_I32(const GcnInst& inst);
void S_BFE_U32(const GcnInst& inst);
void S_BFE(const GcnInst& inst, bool is_signed);
void S_BFE_I32(const GcnInst& inst);
void S_ABSDIFF_I32(const GcnInst& inst);
void S_NOT_B32(const GcnInst& inst);

View File

@ -11,6 +11,7 @@
#include "common/types.h"
#include "shader_recompiler/backend/bindings.h"
#include "shader_recompiler/frontend/copy_shader.h"
#include "shader_recompiler/frontend/tessellation.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/passes/srt.h"
#include "shader_recompiler/ir/reg.h"
@ -174,6 +175,10 @@ struct Info {
PersistentSrtInfo srt_info;
std::vector<u32> flattened_ud_buf;
// TODO handle indirection
IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max;
s32 tess_consts_dword_offset = -1;
std::span<const u32> user_data;
Stage stage;
LogicalStage l_stage;
@ -248,6 +253,21 @@ struct Info {
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
}
}
// TODO probably not needed
bool FoundTessConstantsSharp() {
return tess_consts_dword_offset >= 0;
}
void ReadTessConstantBuffer(TessellationDataConstantBuffer& tess_constants) {
ASSERT(FoundTessConstantsSharp());
auto buf = ReadUdReg<AmdGpu::Buffer>(static_cast<u32>(tess_consts_ptr_base),
static_cast<u32>(tess_consts_dword_offset));
VAddr tess_constants_addr = buf.base_address;
memcpy(&tess_constants,
reinterpret_cast<TessellationDataConstantBuffer*>(tess_constants_addr),
sizeof(tess_constants));
}
};
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {

View File

@ -118,14 +118,34 @@ std::string NameOf(Attribute attribute) {
return "LocalInvocationIndex";
case Attribute::InvocationId:
return "InvocationId";
case Attribute::PackedHullInvocationInfo:
return "PackedHullInvocationInfo";
case Attribute::PatchVertices:
return "PatchVertices";
case Attribute::TessellationEvaluationPointU:
return "TessellationEvaluationPointU";
case Attribute::TessellationEvaluationPointV:
return "TessellationEvaluationPointV";
case Attribute::PackedHullInvocationInfo:
return "PackedHullInvocationInfo";
case Attribute::TcsLsStride:
return "TcsLsStride";
case Attribute::TcsCpStride:
return "TcsCpStride";
case Attribute::TcsNumPatches:
return "TcsNumPatches";
case Attribute::TcsOutputBase:
return "TcsOutputBase";
case Attribute::TcsPatchConstSize:
return "TcsPatchConstSize";
case Attribute::TcsPatchConstBase:
return "TcsPatchConstBase";
case Attribute::TcsPatchOutputSize:
return "TcsPatchOutputSize";
case Attribute::TcsOffChipTessellationFactorThreshold:
return "TcsOffChipTessellationFactorThreshold";
case Attribute::TcsFirstEdgeTessFactorIndex:
return "TcsFirstEdgeTessFactorIndex";
case Attribute::TessPatchIdInVgt:
return "TessPatchIdInVgt";
default:
break;
}

View File

@ -75,11 +75,23 @@ enum class Attribute : u64 {
InstanceId0 = 78, // step rate 0
InstanceId1 = 79, // step rate 1
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
PatchVertices = 81,
TessellationEvaluationPointU = 82,
TessellationEvaluationPointV = 83,
PackedHullInvocationInfo =
81, // PrimitiveId (patch id) and InvocationId (output control point id)
PatchVertices = 82,
TessellationEvaluationPointU = 83,
TessellationEvaluationPointV = 84,
84, // PrimitiveId (patch id) and InvocationId (output control point id)
// Probably don't need all these.
// Most should be dead after hull shader transform
TcsLsStride = 85,
TcsCpStride = 86,
TcsNumPatches = 87,
TcsOutputBase = 88,
TcsPatchConstSize = 89,
TcsPatchConstBase = 90,
TcsPatchOutputSize = 91,
TcsOffChipTessellationFactorThreshold = 92,
TcsFirstEdgeTessFactorIndex = 93,
TessPatchIdInVgt = 94,
Max,
};
@ -91,6 +103,11 @@ constexpr bool IsPosition(Attribute attribute) noexcept {
return attribute >= Attribute::Position0 && attribute <= Attribute::Position3;
}
constexpr bool IsTessCoord(Attribute attribute) noexcept {
return attribute >= Attribute::TessellationEvaluationPointU &&
attribute <= Attribute::TessellationEvaluationPointV;
}
constexpr bool IsParam(Attribute attribute) noexcept {
return attribute >= Attribute::Param0 && attribute <= Attribute::Param31;
}

View File

@ -270,8 +270,8 @@ void IREmitter::SetM0(const U32& value) {
Inst(Opcode::SetM0, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, u32 index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), Imm32(index));
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp, IR::Value index) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp), index);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {

View File

@ -82,7 +82,8 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0, u32 index = 0);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0,
IR::Value index = IR::Value(u32(0u)));
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
@ -338,6 +339,7 @@ private:
template <typename T = Value, typename... Args>
T Inst(Opcode op, Args... args) {
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
it->SetParent(block);
return T{Value{&*it}};
}
@ -355,6 +357,7 @@ private:
u32 raw_flags{};
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
it->SetParent(block);
return T{Value{&*it}};
}
};

View File

@ -153,6 +153,7 @@ void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
void Inst::Invalidate() {
ClearArgs();
ASSERT(users.list.empty());
ReplaceOpcode(Opcode::Void);
}

View File

@ -294,6 +294,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::IMul32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
return;
case IR::Opcode::UDiv32:
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a / b; });
return;
case IR::Opcode::FPCmpClass32:
FoldCmpClass(block, inst);
return;

View File

@ -0,0 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once

View File

@ -1,10 +1,29 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <numeric>
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
// TODO delelte
#include "common/io_file.h"
#include "common/path_util.h"
namespace Shader::Optimization {
static void DumpIR(IR::Program& program, std::string phase) {
std::string s = IR::DumpProgram(program);
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename =
fmt::format("{}_{:#018x}.{}.ir.txt", program.info.stage, program.info.pgm_hash, phase);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteString(s);
};
/**
* Tessellation shaders pass outputs to the next shader using LDS.
* The Hull shader stage receives input control points stored in LDS.
@ -66,17 +85,411 @@ namespace Shader::Optimization {
* Must be placed in uniform control flow
*/
void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) {
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
// Bad pattern matching attempt
template <typename Derived>
struct MatchObject {
inline bool DoMatch(IR::Value v) {
return static_cast<Derived*>(this)->DoMatch(v);
}
};
struct MatchValue : MatchObject<MatchValue> {
MatchValue(IR::Value& return_val_) : return_val(return_val_) {}
inline bool DoMatch(IR::Value v) {
return_val = v;
return true;
}
private:
IR::Value& return_val;
};
struct MatchIgnore : MatchObject<MatchIgnore> {
MatchIgnore() {}
inline bool DoMatch(IR::Value v) {
return true;
}
};
struct MatchImm : MatchObject<MatchImm> {
MatchImm(IR::Value& v) : return_val(v) {}
inline bool DoMatch(IR::Value v) {
if (!v.IsImmediate()) {
return false;
}
return_val = v;
return true;
}
private:
IR::Value& return_val;
};
// Specific
struct MatchAttribute : MatchObject<MatchAttribute> {
MatchAttribute(IR::Attribute attribute_) : attribute(attribute_) {}
inline bool DoMatch(IR::Value v) {
return v.Type() == IR::Type::Attribute && v.Attribute() == attribute;
}
private:
IR::Attribute attribute;
};
// Specific
struct MatchU32 : MatchObject<MatchU32> {
MatchU32(u32 imm_) : imm(imm_) {}
inline bool DoMatch(IR::Value v) {
return v.Type() == IR::Type::U32 && v.U32() == imm;
}
private:
u32 imm;
};
template <IR::Opcode opcode, typename... Args>
struct MatchInstObject : MatchObject<MatchInstObject<opcode>> {
static_assert(sizeof...(Args) == IR::NumArgsOf(opcode));
MatchInstObject(Args&&... args) : pattern(std::forward_as_tuple(args...)) {}
inline bool DoMatch(IR::Value v) {
IR::Inst* inst = v.TryInstRecursive();
if (!inst || inst->GetOpcode() != opcode) {
return false;
}
bool matched = true;
[&]<std::size_t... Is>(std::index_sequence<Is...>) {
((matched = matched && std::get<Is>(pattern).DoMatch(inst->Arg(Is))), ...);
}(std::make_index_sequence<sizeof...(Args)>{});
return matched;
}
private:
using MatchArgs = std::tuple<Args&...>;
MatchArgs pattern;
};
template <IR::Opcode opcode, typename... Args>
auto MakeInstPattern(Args&&... args) {
return MatchInstObject<opcode, Args...>(std::forward<Args>(args)...);
}
struct MatchFoldImm : MatchObject<MatchFoldImm> {
MatchFoldImm(IR::Value& v) : return_val(v) {}
inline bool DoMatch(IR::Value v);
private:
IR::Value& return_val;
};
// Represent address as sum of products
// Input control point:
// PrimitiveId * input_cp_stride * #cp_per_input_patch + index * input_cp_stride + (attr# * 16 +
// component)
// Output control point
// #patches * input_cp_stride * #cp_per_input_patch + PrimitiveId * output_patch_stride +
// InvocationID * output_cp_stride + (attr# * 16 + component)
// Per patch output:
// #patches * input_cp_stride * #cp_per_input_patch + #patches * output_patch_stride +
// + PrimitiveId * per_patch_output_stride + (attr# * 16 + component)
// Sort terms left to right
namespace {
static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offset,
Shader::Info& info, Shader::RuntimeInfo& runtime_info,
TessellationDataConstantBuffer& tess_constants) {
info.tess_consts_ptr_base = sharp_ptr_base;
info.tess_consts_dword_offset = sharp_dword_offset;
info.ReadTessConstantBuffer(tess_constants);
if (info.l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
return;
}
struct TessSharpLocation {
IR::ScalarReg ptr_base;
u32 dword_off;
};
std::optional<TessSharpLocation> FindTessConstantSharp(IR::Inst* read_const_buffer) {
IR::Value sharp_ptr_base;
IR::Value sharp_dword_offset;
IR::Value rv = IR::Value{read_const_buffer};
IR::Value handle = read_const_buffer->Arg(0);
if (MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(
MakeInstPattern<IR::Opcode::GetUserData>(MatchImm(sharp_dword_offset)), MatchIgnore(),
MatchIgnore(), MatchIgnore())
.DoMatch(handle)) {
return TessSharpLocation{.ptr_base = IR::ScalarReg::Max,
.dword_off = static_cast<u32>(sharp_dword_offset.ScalarReg())};
} else if (MakeInstPattern<IR::Opcode::CompositeConstructU32x4>(
MakeInstPattern<IR::Opcode::ReadConst>(
MakeInstPattern<IR::Opcode::CompositeConstructU32x2>(
MakeInstPattern<IR::Opcode::GetUserData>(MatchImm(sharp_ptr_base)),
MatchIgnore()),
MatchImm(sharp_dword_offset)),
MatchIgnore(), MatchIgnore(), MatchIgnore())
.DoMatch(handle)) {
return TessSharpLocation{.ptr_base = sharp_ptr_base.ScalarReg(),
.dword_off = sharp_dword_offset.U32()};
}
UNREACHABLE_MSG("failed to match tess constants sharp buf");
return {};
}
static IR::Program* g_program; // TODO delete
enum AttributeRegion { InputCP, OutputCP, PatchConst, Unknown };
struct RingAddressInfo {
AttributeRegion region{};
u32 attribute_byte_offset{};
// For InputCP and OutputCP, offset from the start of the patch's memory (including
// attribute_byte_offset) For PatchConst, not relevant
IR::U32 offset_in_patch{IR::Value(0u)};
};
class Pass {
public:
Pass(Info& info_, RuntimeInfo& runtime_info_) : info(info_), runtime_info(runtime_info_) {
InitTessConstants(info.tess_consts_ptr_base, info.tess_consts_dword_offset, info,
runtime_info, tess_constants);
}
RingAddressInfo WalkRingAccess(IR::Inst* access, IR::IREmitter& insert_point) {
Reset();
RingAddressInfo address_info{};
IR::Value addr;
switch (access->GetOpcode()) {
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU128:
addr = access->Arg(0);
break;
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
addr = access->Arg(1);
break;
default:
UNREACHABLE();
}
products.emplace_back(addr);
Visit(addr);
FindIndexInfo(address_info, insert_point);
return address_info;
}
private:
void Reset() {
within_mul = false;
products.clear();
}
void Visit(IR::Value node) {
IR::Value a, b, c;
if (MakeInstPattern<IR::Opcode::IMul32>(MatchValue(a), MatchValue(b)).DoMatch(node)) {
bool saved_within_mul = within_mul;
within_mul = true;
Visit(a);
Visit(b);
within_mul = saved_within_mul;
} else if (MakeInstPattern<IR::Opcode::IAdd32>(MatchValue(a), MatchValue(b))
.DoMatch(node)) {
if (within_mul) {
UNREACHABLE_MSG("Test");
products.back().as_factors.emplace_back(IR::U32{node});
} else {
products.back().as_nested_value = IR::U32{a};
Visit(a);
products.emplace_back(b);
Visit(b);
}
} else if (MakeInstPattern<IR::Opcode::ShiftLeftLogical32>(MatchValue(a), MatchImm(b))
.DoMatch(node)) {
products.back().as_factors.emplace_back(IR::Value(u32(2 << (b.U32() - 1))));
Visit(a);
} else if (MakeInstPattern<IR::Opcode::ReadConstBuffer>(MatchIgnore(), MatchValue(b))
.DoMatch(node)) {
IR::Inst* read_const_buffer = node.InstRecursive();
IR::Value index = read_const_buffer->Arg(1);
if (index.IsImmediate()) {
u32 offset = index.U32();
if (offset < static_cast<u32>(IR::Attribute::TcsFirstEdgeTessFactorIndex) -
static_cast<u32>(IR::Attribute::TcsLsStride) + 1) {
IR::Attribute tess_constant_attr = static_cast<IR::Attribute>(
static_cast<u32>(IR::Attribute::TcsLsStride) + offset);
IR::IREmitter ir{*read_const_buffer->GetParent(),
IR::Block::InstructionList::s_iterator_to(*read_const_buffer)};
ASSERT(tess_constant_attr !=
IR::Attribute::TcsOffChipTessellationFactorThreshold);
IR::U32 replacement = ir.GetAttributeU32(tess_constant_attr);
read_const_buffer->ReplaceUsesWithAndRemove(replacement);
// Unwrap the attribute from the GetAttribute Inst and push back as a factor
// (more convenient for scanning the factors later)
node = IR::Value{tess_constant_attr};
if (IR::Value{read_const_buffer} == products.back().as_nested_value) {
products.back().as_nested_value = replacement;
}
}
}
products.back().as_factors.emplace_back(node);
} else if (MakeInstPattern<IR::Opcode::GetAttributeU32>(MatchValue(a), MatchU32(0))
.DoMatch(node)) {
products.back().as_factors.emplace_back(a);
} else if (MakeInstPattern<IR::Opcode::BitFieldSExtract>(MatchValue(a), MatchIgnore(),
MatchIgnore())
.DoMatch(node)) {
Visit(a);
} else if (MakeInstPattern<IR::Opcode::BitFieldUExtract>(MatchValue(a), MatchIgnore(),
MatchIgnore())
.DoMatch(node)) {
Visit(a);
} else if (MakeInstPattern<IR::Opcode::BitCastF32U32>(MatchValue(a)).DoMatch(node)) {
return Visit(a);
} else if (MakeInstPattern<IR::Opcode::BitCastU32F32>(MatchValue(a)).DoMatch(node)) {
return Visit(a);
} else if (node.TryInstRecursive() &&
node.InstRecursive()->GetOpcode() == IR::Opcode::Phi) {
DEBUG_ASSERT(false && "Phi test");
products.back().as_factors.emplace_back(node);
} else {
products.back().as_factors.emplace_back(node);
}
}
void FindIndexInfo(RingAddressInfo& address_info, IR::IREmitter& ir) {
// infer which attribute base the address is indexing
// by how many addends are multiplied by TessellationDataConstantBuffer::m_hsNumPatch.
// Also handle m_hsOutputBase or m_patchConstBase
u32 region_count = 0;
// Remove addends except for the attribute offset and possibly the
// control point index calc
std::erase_if(products, [&](Product& p) {
for (IR::Value& value : p.as_factors) {
if (value.Type() == IR::Type::Attribute) {
if (value.Attribute() == IR::Attribute::TcsNumPatches ||
value.Attribute() == IR::Attribute::TcsOutputBase) {
++region_count;
return true;
} else if (value.Attribute() == IR::Attribute::TcsPatchConstBase) {
region_count += 2;
return true;
} else if (value.Attribute() == IR::Attribute::TessPatchIdInVgt) {
return true;
}
}
}
return false;
});
// DumpIR(*g_program, "before_crash");
// Look for some term with a dynamic index (should be the control point index)
for (auto i = 0; i < products.size(); i++) {
auto& factors = products[i].as_factors;
// Remember this as the index term
if (std::any_of(factors.begin(), factors.end(), [&](const IR::Value& v) {
return !v.IsImmediate() || v.Type() == IR::Type::Attribute;
})) {
address_info.offset_in_patch =
ir.IAdd(address_info.offset_in_patch, products[i].as_nested_value);
} else {
ASSERT_MSG(factors.size() == 1, "factors all const but not const folded");
// Otherwise assume it contributes to the attribute
address_info.offset_in_patch =
ir.IAdd(address_info.offset_in_patch, IR::U32{factors[0]});
address_info.attribute_byte_offset += factors[0].U32();
}
}
if (region_count == 0) {
address_info.region = AttributeRegion::InputCP;
} else if (info.l_stage == LogicalStage::TessellationControl &&
runtime_info.hs_info.IsPassthrough()) {
ASSERT(region_count <= 1);
address_info.region = AttributeRegion::PatchConst;
} else {
ASSERT(region_count <= 2);
address_info.region = AttributeRegion(region_count);
}
}
Info& info;
RuntimeInfo& runtime_info;
TessellationDataConstantBuffer tess_constants;
bool within_mul{};
// One product in the sum of products making up an address
struct Product {
Product(IR::Value val_) : as_nested_value(val_), as_factors() {}
Product(const Product& other) = default;
~Product() = default;
// IR value used as an addend in address calc
IR::U32 as_nested_value;
// all the leaves that feed the multiplication, linear
// TODO small_vector
// boost::container::small_vector<IR::Value, 4> as_factors;
std::vector<IR::Value> as_factors;
};
std::vector<Product> products;
};
} // namespace
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
g_program = &program; // TODO delete
Info& info = program.info;
Pass pass(info, runtime_info);
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{*block,
IR::Block::InstructionList::s_iterator_to(inst)}; // TODO sink this
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4: {
// TODO: rename struct
RingAddressInfo address_info = pass.WalkRingAccess(&inst, ir);
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.globally_coherent) {
break;
@ -89,11 +502,30 @@ void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
};
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
const auto factor_idx = info.inst_offset.Value() >> 2;
const u32 gcn_factor_idx =
(info.inst_offset.Value() + address_info.attribute_byte_offset) >> 2;
const IR::Value data = inst.Arg(2);
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
ASSERT(gcn_factor_idx * 4 < runtime_info.hs_info.tess_factor_stride);
switch (runtime_info.hs_info.tess_factor_stride) {
case 24:
return IR::PatchFactor(gcn_factor_idx);
case 16:
if (gcn_factor_idx == 3) {
return IR::Patch::TessellationLodInteriorU;
}
return IR::PatchFactor(gcn_factor_idx);
default:
UNREACHABLE_MSG("Unhandled tess factor stride");
}
};
inst.Invalidate();
if (num_dwords == 1) {
ir.SetPatch(IR::PatchFactor(factor_idx), GetValue(data));
ir.SetPatch(get_factor_attr(gcn_factor_idx), GetValue(data));
break;
}
auto* inst = data.TryInstRecursive();
@ -101,13 +533,20 @@ void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
for (s32 i = 0; i < num_dwords; i++) {
ir.SetPatch(IR::PatchFactor(factor_idx + i), GetValue(inst->Arg(i)));
ir.SetPatch(get_factor_attr(gcn_factor_idx + i), GetValue(inst->Arg(i)));
}
break;
}
// case IR::Opcode::WriteSharedU128: // TODO
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64: {
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
// DumpIR(program, "before_walk");
RingAddressInfo address_info = pass.WalkRingAccess(&inst, ir);
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32
? 1
: (opcode == IR::Opcode::WriteSharedU64 ? 2 : 4);
const IR::Value data = inst.Arg(1);
const auto [data_lo, data_hi] = [&] -> std::pair<IR::U32, IR::U32> {
if (num_dwords == 1) {
@ -116,38 +555,148 @@ void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_
const auto* prod = data.InstRecursive();
return {IR::U32{prod->Arg(0)}, IR::U32{prod->Arg(1)}};
}();
const IR::Inst* ds_offset = inst.Arg(0).InstRecursive();
const u32 offset_dw = ds_offset->Arg(1).U32() >> 4;
IR::Inst* prod = ds_offset->Arg(0).TryInstRecursive();
ASSERT(prod && (prod->GetOpcode() == IR::Opcode::IAdd32 ||
prod->GetOpcode() == IR::Opcode::IMul32));
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
prod = prod->Arg(0).TryInstRecursive();
ASSERT(prod && prod->GetOpcode() == IR::Opcode::IMul32);
}
prod = prod->Arg(0).TryInstRecursive();
ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldSExtract &&
prod->Arg(2).IsImmediate() && prod->Arg(2).U32() == 24);
prod = prod->Arg(0).TryInstRecursive();
ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldUExtract);
const u32 bit_pos = prod->Arg(1).U32();
const auto SetOutput = [&ir](IR::U32 value, u32 offset_dw, bool is_patch_const) {
const auto SetOutput = [&](IR::U32 value, u32 offset_dw,
AttributeRegion output_kind) {
const IR::F32 data = ir.BitCast<IR::F32, IR::U32>(value);
if (!is_patch_const) {
if (output_kind == AttributeRegion::OutputCP) {
const u32 param = offset_dw >> 2;
const u32 comp = offset_dw & 3;
// Invocation ID array index is implicit, handled by SPIRV backend
ir.SetAttribute(IR::Attribute::Param0 + param, data, comp);
} else {
ASSERT(output_kind == AttributeRegion::PatchConst);
ir.SetPatch(IR::PatchGeneric(offset_dw), data);
}
};
ASSERT_MSG(bit_pos == 0 || bit_pos == 8, "Unknown bit extract pos {}", bit_pos);
const bool is_patch_const = bit_pos == 0;
SetOutput(data_lo, offset_dw, is_patch_const);
u32 offset_dw = address_info.attribute_byte_offset >> 2;
SetOutput(data_lo, offset_dw, address_info.region);
if (num_dwords > 1) {
SetOutput(data_hi, offset_dw + 1, is_patch_const);
// TODO handle WriteSharedU128
SetOutput(data_hi, offset_dw + 1, address_info.region);
}
inst.Invalidate();
break;
}
case IR::Opcode::LoadSharedU32: {
// case IR::Opcode::LoadSharedU64:
// case IR::Opcode::LoadSharedU128:
RingAddressInfo address_info = pass.WalkRingAccess(&inst, ir);
ASSERT(address_info.region == AttributeRegion::InputCP ||
address_info.region == AttributeRegion::OutputCP);
switch (address_info.region) {
case AttributeRegion::InputCP: {
u32 offset_dw =
(address_info.attribute_byte_offset % runtime_info.hs_info.ls_stride) >> 2;
const u32 param = offset_dw >> 2;
const u32 comp = offset_dw & 3;
IR::Value control_point_index =
ir.IDiv(IR::U32{address_info.offset_in_patch},
ir.Imm32(runtime_info.hs_info.ls_stride));
IR::Value get_attrib =
ir.GetAttribute(IR::Attribute::Param0 + param, comp, control_point_index);
get_attrib = ir.BitCast<IR::U32>(IR::F32{get_attrib});
inst.ReplaceUsesWithAndRemove(get_attrib);
break;
}
case AttributeRegion::OutputCP: {
UNREACHABLE_MSG("Unhandled output control point read");
break;
}
default:
break;
}
}
default:
break;
}
}
}
if (runtime_info.hs_info.IsPassthrough()) {
// Copy input attributes to output attributes, indexed by InvocationID
// Passthrough should imply that input and output patches have same number of vertices
IR::Block* entry_block = *program.blocks.begin();
auto it = std::ranges::find_if(entry_block->Instructions(), [](IR::Inst& inst) {
return inst.GetOpcode() == IR::Opcode::Prologue;
});
ASSERT(it != entry_block->end());
++it;
ASSERT(it != entry_block->end());
++it;
// Prologue
// SetExec #true
// <- insert here
// ...
IR::IREmitter ir{*entry_block, it};
ASSERT(runtime_info.hs_info.ls_stride % 16 == 0);
u32 num_attributes = runtime_info.hs_info.ls_stride / 16;
const auto invocation_id = ir.GetAttributeU32(IR::Attribute::InvocationId);
for (u32 i = 0; i < num_attributes; i++) {
for (u32 j = 0; j < 4; j++) {
const auto input_attr =
ir.GetAttribute(IR::Attribute::Param0 + i, j, invocation_id);
// InvocationId is implicit index for output control point writes
ir.SetAttribute(IR::Attribute::Param0 + i, input_attr, j);
}
}
// TODO: wrap rest of program with if statement when passthrough?
// copy passthrough attributes ...
// if (InvocationId == 0) {
// program ...
// }
}
}
// TODO refactor
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
Info& info = program.info;
Pass pass(info, runtime_info);
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
const auto opcode = inst.GetOpcode();
switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU32: {
// case IR::Opcode::LoadSharedU64:
// case IR::Opcode::LoadSharedU128: // TODO
RingAddressInfo address_info = pass.WalkRingAccess(&inst, ir);
ASSERT(address_info.region == AttributeRegion::OutputCP ||
address_info.region == AttributeRegion::PatchConst);
switch (address_info.region) {
case AttributeRegion::OutputCP: {
u32 offset_dw = (address_info.attribute_byte_offset %
runtime_info.vs_info.hs_output_cp_stride) >>
2;
const u32 param = offset_dw >> 2;
const u32 comp = offset_dw & 3;
IR::Value control_point_index =
ir.IDiv(IR::U32{address_info.offset_in_patch},
ir.Imm32(runtime_info.vs_info.hs_output_cp_stride));
IR::Value get_attrib =
ir.GetAttribute(IR::Attribute::Param0 + param, comp, control_point_index);
get_attrib = ir.BitCast<IR::U32>(IR::F32{get_attrib});
inst.ReplaceUsesWithAndRemove(get_attrib);
break;
}
case AttributeRegion::PatchConst: {
u32 offset_dw = address_info.attribute_byte_offset >> 2;
IR::Value get_patch = ir.GetPatch(IR::PatchGeneric(offset_dw));
inst.ReplaceUsesWithAndRemove(get_patch);
break;
}
default:
break;
}
break;
}
default:
@ -155,7 +704,140 @@ void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_
}
}
}
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
}
// Run before copy prop
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info) {
TessellationDataConstantBuffer tess_constants;
Shader::Info& info = program.info;
// Find the TessellationDataConstantBuffer V#
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU128: {
IR::Value addr = inst.Arg(0);
auto read_const_buffer = IR::BreadthFirstSearch(
addr, [](IR::Inst* maybe_tess_const) -> std::optional<IR::Inst*> {
if (maybe_tess_const->GetOpcode() == IR::Opcode::ReadConstBuffer) {
return maybe_tess_const;
}
return std::nullopt;
});
if (read_const_buffer) {
auto sharp_location = FindTessConstantSharp(read_const_buffer.value());
if (sharp_location) {
if (info.FoundTessConstantsSharp()) {
ASSERT(static_cast<s32>(sharp_location->dword_off) ==
info.tess_consts_dword_offset &&
sharp_location->ptr_base == info.tess_consts_ptr_base);
}
InitTessConstants(sharp_location->ptr_base,
static_cast<s32>(sharp_location->dword_off), info,
runtime_info, tess_constants);
// break; TODO
continue;
}
}
continue;
}
default:
continue;
}
break;
}
}
ASSERT(info.FoundTessConstantsSharp());
if (info.l_stage == LogicalStage::TessellationControl) {
// Replace the BFEs on V1 (packed with patch id and output cp id) for easier pattern
// matching
for (IR::Block* block : program.blocks) {
for (auto it = block->Instructions().begin(); it != block->Instructions().end(); it++) {
IR::Inst& inst = *it;
if (MakeInstPattern<IR::Opcode::BitFieldUExtract>(
MakeInstPattern<IR::Opcode::GetAttributeU32>(
MatchAttribute(IR::Attribute::PackedHullInvocationInfo), MatchIgnore()),
MatchU32(0), MatchU32(8))
.DoMatch(IR::Value{&inst})) {
IR::IREmitter emit(*block, it);
IR::Value replacement = emit.GetAttributeU32(IR::Attribute::TessPatchIdInVgt);
inst.ReplaceUsesWithAndRemove(replacement);
} else if (MakeInstPattern<IR::Opcode::BitFieldUExtract>(
MakeInstPattern<IR::Opcode::GetAttributeU32>(
MatchAttribute(IR::Attribute::PackedHullInvocationInfo),
MatchIgnore()),
MatchU32(8), MatchU32(5))
.DoMatch(IR::Value{&inst})) {
IR::IREmitter ir(*block, it);
IR::Value replacement;
if (runtime_info.hs_info.IsPassthrough()) {
// Deal with annoying pattern in BB where InvocationID use makes no sense
// (in addr calculation for patchconst write)
replacement = ir.Imm32(0);
} else {
replacement = ir.GetAttributeU32(IR::Attribute::InvocationId);
}
inst.ReplaceUsesWithAndRemove(replacement);
}
}
}
}
}
void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info) {
Shader::Info& info = program.info;
TessellationDataConstantBuffer tess_constants;
InitTessConstants(info.tess_consts_ptr_base, info.tess_consts_dword_offset, info, runtime_info,
tess_constants);
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (inst.GetOpcode() == IR::Opcode::GetAttributeU32) {
switch (inst.Arg(0).Attribute()) {
case IR::Attribute::TcsLsStride:
ASSERT(info.l_stage == LogicalStage::TessellationControl);
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_lsStride));
break;
case IR::Attribute::TcsCpStride:
inst.ReplaceUsesWithAndRemove(IR::Value(tess_constants.m_hsCpStride));
break;
case IR::Attribute::TcsNumPatches:
case IR::Attribute::TcsOutputBase:
case IR::Attribute::TcsPatchConstSize:
case IR::Attribute::TcsPatchConstBase:
case IR::Attribute::TcsPatchOutputSize:
case IR::Attribute::TcsFirstEdgeTessFactorIndex:
default:
break;
}
}
}
}
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadSharedU32:
case IR::Opcode::LoadSharedU64:
case IR::Opcode::LoadSharedU128:
case IR::Opcode::WriteSharedU32:
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU128:
UNREACHABLE_MSG("Remaining DS instruction. {} transform failed",
info.l_stage == LogicalStage::TessellationControl ? "Hull"
: "Domain");
default:
break;
}
}
}
}
} // namespace Shader::Optimization

View File

@ -16,7 +16,11 @@ void FlattenExtendedUserdataPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);
void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
void TessellationPreprocess(IR::Program& program, RuntimeInfo& runtime_info);
void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
void DomainShaderTransform(IR::Program& program, RuntimeInfo& runtime_info);
void TessellationPostprocess(IR::Program& program, RuntimeInfo& runtime_info);
} // namespace Shader::Optimization

View File

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/program.h"
@ -10,80 +9,12 @@
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
namespace {
// TODO clean this up. Maybe remove
// from https://github.com/chaotic-cx/mesa-mirror/blob/main/src/amd/compiler/README.md
// basically logical stage x hw stage permutations
enum class SwHwStagePerm {
vertex_vs,
fragment_fs,
vertex_ls,
tess_control_hs,
tess_eval_vs,
vertex_es,
geometry_gs,
gs_copy_vs,
tess_eval_es,
compute_cs,
};
static SwHwStagePerm GetSwHwStagePerm(Shader::Stage hw_stage, Shader::LogicalStage sw_stage) {
using namespace Shader;
switch (sw_stage) {
case LogicalStage::Fragment:
ASSERT(hw_stage == Stage::Fragment);
return SwHwStagePerm::fragment_fs;
case LogicalStage::Vertex: {
switch (hw_stage) {
case Stage::Vertex:
return SwHwStagePerm::vertex_vs;
case Stage::Export:
return SwHwStagePerm::vertex_es;
case Stage::Local:
return SwHwStagePerm::vertex_ls;
default:
UNREACHABLE();
}
} break;
case LogicalStage::TessellationControl:
ASSERT(hw_stage == Stage::Hull);
return SwHwStagePerm::tess_control_hs;
case LogicalStage::TessellationEval: {
switch (hw_stage) {
case Stage::Vertex:
return SwHwStagePerm::tess_eval_vs;
case Stage::Export:
return SwHwStagePerm::tess_eval_es;
default:
UNREACHABLE();
}
}
case LogicalStage::Geometry:
ASSERT(hw_stage == Stage::Geometry);
return SwHwStagePerm::geometry_gs;
case LogicalStage::GsCopy:
ASSERT(hw_stage == Stage::Vertex);
return SwHwStagePerm::gs_copy_vs;
case LogicalStage::Compute:
ASSERT(hw_stage == Stage::Compute);
return SwHwStagePerm::compute_cs;
default:
UNREACHABLE();
}
}
}; // namespace
namespace Shader::Optimization {
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info) {
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage) {
auto& info = program.info;
Stage stage = info.stage;
LogicalStage l_stage = info.l_stage;
SwHwStagePerm stage_perm = GetSwHwStagePerm(stage, l_stage);
const auto& ForEachInstruction = [&](auto func) {
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
@ -93,8 +24,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
}
};
switch (stage_perm) {
case SwHwStagePerm::vertex_ls: {
switch (stage) {
case Stage::Local: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
@ -126,7 +57,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
});
break;
}
case SwHwStagePerm::vertex_es: {
case Stage::Export: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
@ -157,7 +88,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
});
break;
}
case SwHwStagePerm::geometry_gs: {
case Stage::Geometry: {
const auto& gs_info = runtime_info.gs_info;
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
@ -171,7 +102,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
}
const auto shl_inst = inst.Arg(1).TryInstRecursive();
const auto vertex_id = shl_inst->Arg(0).Resolve().U32() >> 2;
const auto vertex_id = ir.Imm32(shl_inst->Arg(0).Resolve().U32() >> 2);
const auto offset = inst.Arg(1).TryInstRecursive()->Arg(1);
const auto bucket = offset.Resolve().U32() / 256u;
const auto attrib = bucket < 4 ? IR::Attribute::Position0

View File

@ -16,9 +16,9 @@
#include "shader_recompiler/exception.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/patch.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/type.h"
#include "shader_recompiler/ir/patch.h"
namespace Shader::IR {

View File

@ -32,7 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
}
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile) {
RuntimeInfo& runtime_info, const Profile& profile) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
if (code[0] != token_mov_vcchi) {
@ -65,53 +65,54 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
// Run optimization passes
const auto stage = program.info.stage;
bool dump_ir = true;
bool extra_id_removal = true; // TODO remove all this stuff
auto dumpMatchingIR = [&](std::string phase) {
if (dump_ir) {
if (Config::dumpShaders()) {
std::string s = IR::DumpProgram(program);
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename =
fmt::format("{}_{:#018x}.{}.ir.txt", info.stage, info.pgm_hash, phase);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteString(s);
if (Config::dumpShaders()) {
std::string s = IR::DumpProgram(program);
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename =
fmt::format("{}_{:#018x}.{}.ir.txt", info.stage, info.pgm_hash, phase);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteString(s);
}
};
dumpMatchingIR("init");
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
if (extra_id_removal) {
Shader::Optimization::IdentityRemovalPass(program.blocks);
}
Shader::Optimization::IdentityRemovalPass(program.blocks);
// Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
dumpMatchingIR("post_ssa");
if (stage == Stage::Hull) {
Shader::Optimization::TessellationPreprocess(program, runtime_info);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
dumpMatchingIR("pre_hull");
Shader::Optimization::HullShaderTransform(program, runtime_info);
dumpMatchingIR("post_hull");
Shader::Optimization::TessellationPostprocess(program, runtime_info);
} else if (info.l_stage == LogicalStage::TessellationEval) {
Shader::Optimization::TessellationPreprocess(program, runtime_info);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
dumpMatchingIR("pre_domain");
Shader::Optimization::DomainShaderTransform(program, runtime_info);
dumpMatchingIR("post_domain");
Shader::Optimization::TessellationPostprocess(program, runtime_info);
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
if (extra_id_removal) {
Shader::Optimization::IdentityRemovalPass(program.blocks);
}
dumpMatchingIR("pre_ring");
Shader::Optimization::RingAccessElimination(program, runtime_info);
if (extra_id_removal) {
Shader::Optimization::IdentityRemovalPass(program.blocks);
}
dumpMatchingIR("post_ring");
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
if (stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program);
dumpMatchingIR("final");
return program;
}

View File

@ -28,6 +28,6 @@ struct Pools {
};
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
const RuntimeInfo& runtime_info, const Profile& profile);
RuntimeInfo& runtime_info, const Profile& profile);
} // namespace Shader

View File

@ -7,6 +7,7 @@
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "shader_recompiler/frontend/tessellation.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/types.h"
@ -74,30 +75,56 @@ struct VertexRuntimeInfo {
u32 num_outputs;
std::array<VsOutputMap, 3> outputs;
bool emulate_depth_negative_one_to_one{};
// Domain
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
u32 hs_output_cp_stride{};
bool operator==(const VertexRuntimeInfo& other) const noexcept {
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
tess_type == other.tess_type && tess_topology == other.tess_topology &&
tess_partitioning == other.tess_partitioning;
tess_partitioning == other.tess_partitioning &&
hs_output_cp_stride == other.hs_output_cp_stride;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
hs_output_cp_stride = tess_constants.m_hsCpStride;
}
};
struct HullRuntimeInfo {
// from registers
u32 output_control_points;
// trying to debug TODO probably delete this
u32 input_control_points;
u32 num_patches;
u32 num_instances;
u64 tess_factor_memory_base;
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
bool operator==(const HullRuntimeInfo& other) const noexcept {
return output_control_points == other.output_control_points;
// from HullStateConstants in HsProgram (TODO dont rely on this)
u32 tess_factor_stride;
// from tess constants buffer
u32 ls_stride;
u32 hs_output_cp_stride;
u32 hs_num_patch;
u32 hs_output_base;
u32 patch_const_size;
u32 patch_const_base;
u32 patch_output_size;
u32 first_edge_tess_factor_index;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
bool IsPassthrough() {
return hs_output_base == 0;
};
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.m_lsStride;
hs_output_cp_stride = tess_constants.m_hsCpStride;
hs_num_patch = tess_constants.m_hsNumPatch;
hs_output_base = tess_constants.m_hsOutputBase;
patch_const_size = tess_constants.m_patchConstSize;
patch_const_base = tess_constants.m_patchConstBase;
patch_output_size = tess_constants.m_patchOutputSize;
first_edge_tess_factor_index = tess_constants.m_firstEdgeTessFactorIndex;
}
};

View File

@ -143,6 +143,22 @@ struct Liverpool {
}
};
struct HsStageRegisters {
u32 vgt_tf_param;
u32 vgt_hos_max_tess_level;
u32 vgt_hos_min_tess_level;
};
struct HsConstants {
u32 num_input_cp;
u32 num_output_cp;
u32 num_patch_const;
u32 cp_stride;
u32 num_threads;
u32 tess_factor_stride;
u32 first_edge_tess_factor_index;
};
struct ComputeProgram {
u32 dispatch_initiator;
u32 dim_x;
@ -974,7 +990,8 @@ struct Liverpool {
BitField<2, 1, u32> hs_en;
BitField<3, 2, u32> es_en;
BitField<5, 1, u32> gs_en;
BitField<6, 1, u32> vs_en;
BitField<6, 2, u32> vs_en;
BitField<8, 24, u32> dynamic_hs; // TODO testing
bool IsStageEnabled(u32 stage) const {
switch (stage) {
@ -1145,7 +1162,11 @@ struct Liverpool {
ShaderProgram es_program;
INSERT_PADDING_WORDS(0x2C);
ShaderProgram hs_program;
INSERT_PADDING_WORDS(0x2C);
// TODO delete. These don't actually correspond to real registers, but I'll stash them
// here to debug
HsStageRegisters hs_registers;
HsConstants hs_constants;
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20 - 3 - 7);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(0xA4);
ComputeProgram cs_program;
@ -1432,6 +1453,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88);
static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8);
static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08);
static_assert(GFX6_3D_REG_INDEX(hs_registers) == 0x2D1C);
static_assert(GFX6_3D_REG_INDEX(hs_constants) == 0x2D1F);
static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48);
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);

View File

@ -30,6 +30,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
const vk::Device device = instance.GetDevice();
std::ranges::copy(infos, stages.begin());
BuildDescSetLayout();
const bool uses_tessellation = stages[u32(LogicalStage::TessellationControl)];
const vk::PushConstantRange push_constants = {
.stageFlags = gp_stage_flags,
@ -107,8 +108,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
"Primitive restart index other than -1 is not supported yet");
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
// TODO how to handle optional member of graphics key when dynamic state not supported?
//.patchControlPoints = key.
.patchControlPoints = key.patch_control_points,
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
@ -173,8 +173,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
} else {
dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT);
}
ASSERT(instance.IsPatchControlPointsDynamicState()); // TODO remove
if (instance.IsPatchControlPointsDynamicState()) {
if (uses_tessellation && instance.IsPatchControlPointsDynamicState()) {
dynamic_states.push_back(vk::DynamicState::ePatchControlPointsEXT);
}
@ -326,8 +325,9 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pStages = shader_stages.data(),
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
.pInputAssemblyState = &input_assembly,
.pTessellationState =
!instance.IsPatchControlPointsDynamicState() ? &tessellation_state : nullptr,
.pTessellationState = (uses_tessellation && !instance.IsPatchControlPointsDynamicState())
? &tessellation_state
: nullptr,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,

View File

@ -52,6 +52,7 @@ struct GraphicsPipelineKey {
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
std::array<vk::Format, MaxVertexBufferCount> vertex_buffer_formats;
u32 patch_control_points;
bool operator==(const GraphicsPipelineKey& key) const noexcept {
return std::memcmp(this, &key, sizeof(key)) == 0;

View File

@ -258,7 +258,8 @@ bool Instance::CreateDevice() {
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
extended_dynamic_state_2 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
const bool extended_dynamic_state_2 =
add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);
@ -328,6 +329,7 @@ bool Instance::CreateDevice() {
.imageCubeArray = features.imageCubeArray,
.independentBlend = features.independentBlend,
.geometryShader = features.geometryShader,
.tessellationShader = features.tessellationShader,
.logicOp = features.logicOp,
.depthBiasClamp = features.depthBiasClamp,
.fillModeNonSolid = features.fillModeNonSolid,
@ -379,6 +381,9 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT{
.extendedDynamicState = true,
},
vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT{
.extendedDynamicState2PatchControlPoints = true,
},
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
.extendedDynamicState3ColorWriteMask = true,
},
@ -454,6 +459,16 @@ bool Instance::CreateDevice() {
if (!legacy_vertex_attributes) {
device_chain.unlink<vk::PhysicalDeviceLegacyVertexAttributesFeaturesEXT>();
}
if (extended_dynamic_state_2) {
patch_control_points_dynamic_state =
feature_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
.extendedDynamicState2PatchControlPoints;
device_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
.extendedDynamicState2PatchControlPoints = patch_control_points_dynamic_state;
} else {
patch_control_points_dynamic_state = false;
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View File

@ -134,7 +134,7 @@ public:
}
bool IsPatchControlPointsDynamicState() const {
return extended_dynamic_state_2;
return patch_control_points_dynamic_state;
}
/// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported.
@ -337,7 +337,7 @@ private:
bool debug_utils_supported{};
bool has_nsight_graphics{};
bool has_renderdoc{};
bool extended_dynamic_state_2{};
bool patch_control_points_dynamic_state{};
};
} // namespace Vulkan

View File

@ -97,15 +97,15 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
}
case Stage::Hull: {
BuildCommon(regs.hs_program);
info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();
info.hs_info.input_control_points = regs.ls_hs_config.hs_input_control_points;
info.hs_info.num_patches = regs.ls_hs_config.num_patches;
// Suspicious about this in apparently "passthrough" hull shader. Probably not releva
info.hs_info.num_instances = regs.num_instances.NumInstances();
info.hs_info.tess_factor_memory_base = regs.vgt_tf_memory_base.MemoryBase();
info.hs_info.tess_type = regs.tess_config.type;
info.hs_info.tess_topology = regs.tess_config.topology;
info.hs_info.tess_partitioning = regs.tess_config.partitioning;
// TODO: ls_hs_config.output_control_points seems to be == 1 when doing passthrough
// instead of the real number which matches the input patch topology
// info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();
// TODO dont rely on HullStateConstants
info.hs_info.output_control_points = regs.hs_constants.num_output_cp;
info.hs_info.tess_factor_stride = regs.hs_constants.tess_factor_stride;
// We need to initialize most hs_info fields after finding the V# with tess constants
break;
}
case Stage::Export: {
@ -244,27 +244,6 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
return it->second.get();
}
bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
static std::vector<u64> skip_hashes = {
0xbc234799 /* passthrough */,
0x8453cd1c /* passthrough */,
0xd67db0ef /* passthrough */,
0x34121ac6 /* passthrough*/,
0xa26750c1 /* passthrough, warp */,
0xbb88db5f /* passthrough */,
0x90c6fb05 /* passthrough */,
0x9fd272d7 /* forbidden woods (not PS) */,
0x2807dd6c /* forbidden woods, down elevator (not PS) */,
0x627ac5b9 /* ayyylmao*, passthrough */,
0xb5fb5174 /* rom (not PS) */,
};
if (std::ranges::contains(skip_hashes, shader_hash)) {
LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash);
return true;
}
return false;
}
bool PipelineCache::RefreshGraphicsKey() {
std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey));
@ -321,6 +300,11 @@ bool PipelineCache::RefreshGraphicsKey() {
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
key.patch_control_points = 0;
if (regs.stage_enable.hs_en.Value() && !instance.IsPatchControlPointsDynamicState()) {
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
}
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
// recompiler.
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
@ -373,10 +357,6 @@ bool PipelineCache::RefreshGraphicsKey() {
return false;
}
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
return false;
}
auto params = Liverpool::GetParams(*pgm);
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
@ -497,8 +477,7 @@ bool PipelineCache::RefreshComputeKey() {
return true;
}
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
const Shader::RuntimeInfo& runtime_info,
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
std::span<const u32> code, size_t perm_idx,
Shader::Backend::Bindings& binding) {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
@ -532,7 +511,7 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
Shader::ShaderParams params,
Shader::Backend::Bindings& binding) {
const auto runtime_info = BuildRuntimeInfo(stage, l_stage);
auto runtime_info = BuildRuntimeInfo(stage, l_stage);
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
if (new_program) {
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
@ -548,6 +527,15 @@ PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stag
auto& program = it_pgm.value();
auto& info = program->info;
info.RefreshFlatBuf();
if (l_stage == LogicalStage::TessellationControl || l_stage == LogicalStage::TessellationEval) {
Shader::TessellationDataConstantBuffer tess_constants;
info.ReadTessConstantBuffer(tess_constants);
if (l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
}
const auto spec = Shader::StageSpecialization(info, runtime_info, profile, binding);
size_t perm_idx = program->modules.size();
vk::ShaderModule module{};

View File

@ -73,7 +73,7 @@ private:
std::string_view ext);
std::optional<std::vector<u32>> GetShaderPatch(u64 hash, Shader::Stage stage, size_t perm_idx,
std::string_view ext);
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
vk::ShaderModule CompileModule(Shader::Info& info, Shader::RuntimeInfo& runtime_info,
std::span<const u32> code, size_t perm_idx,
Shader::Backend::Bindings& binding);
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);

View File

@ -50,9 +50,9 @@ void Rasterizer::CpSync() {
bool Rasterizer::FilterDraw() {
const auto& regs = liverpool->regs;
// Tessellation is unsupported so skip the draw to avoid locking up the driver.
if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
return false;
}
// if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
// return false;
// }
// There are several cases (e.g. FCE, FMask/HTile decompression) where we don't need to do an
// actual draw hence can skip pipeline creation.
if (regs.color_control.mode == Liverpool::ColorControl::OperationMode::EliminateFastClear) {