fix compiler errors after merge

DONT MERGE set log file to /dev/null

DONT MERGE linux pthread bb fix

save work

DONT MERGE dump ir

save more work

fix mistake with ES shader

skip list

add input patch control points dynamic state

random stuff
This commit is contained in:
Frodo Baggins 2024-10-16 09:51:45 -07:00
parent c9f0771c0f
commit 6a4cf2763a
30 changed files with 422 additions and 43 deletions

View File

@ -62,7 +62,8 @@ private:
class FileBackend {
public:
explicit FileBackend(const std::filesystem::path& filename)
: file{filename, FS::FileAccessMode::Write, FS::FileType::TextFile} {}
: file{std::filesystem::path("/dev/null"), FS::FileAccessMode::Write,
FS::FileType::TextFile} {}
~FileBackend() = default;

View File

@ -1,5 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/runtime_info.h"
#pragma clang optimize off
#include <span>
#include <type_traits>
@ -285,6 +286,9 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
}
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation);
}
}
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
@ -309,7 +313,6 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
ctx.AddCapability(spv::Capability::Tessellation);
const auto& vs_info = ctx.runtime_info.vs_info;
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));

View File

@ -34,4 +34,13 @@ void EmitDeviceMemoryBarrier(EmitContext& ctx) {
MemoryBarrier(ctx, spv::Scope::Device);
}
void EmitTcsOutputBarrier(EmitContext& ctx) {
const auto execution{spv::Scope::Workgroup};
const auto memory{spv::Scope::Invocation};
const auto memory_semantics{spv::MemorySemanticsMask::MaskNone};
ctx.OpControlBarrier(ctx.ConstU32(static_cast<u32>(execution)),
ctx.ConstU32(static_cast<u32>(memory)),
ctx.ConstU32(static_cast<u32>(memory_semantics)));
}
} // namespace Shader::Backend::SPIRV

View File

@ -2,6 +2,9 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/runtime_info.h"
#pragma clang optimize off
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/patch.h"
@ -273,8 +276,21 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
ctx.u32_zero_value);
case IR::Attribute::PrimitiveId:
ASSERT(ctx.info.stage == Stage::Geometry);
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl ||
ctx.info.l_stage == LogicalStage::TessellationEval);
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
case IR::Attribute::InvocationId:
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.invocation_id);
case IR::Attribute::PatchVertices:
ASSERT(ctx.info.l_stage == LogicalStage::TessellationControl);
return ctx.OpLoad(ctx.U32[1], ctx.patch_vertices);
case IR::Attribute::PackedHullInvocationInfo:
// TODO figure out what to do with this
// should be dead code, but otherwise return 0 or concat PrimitiveId and InvocationId
return ctx.u32_zero_value;
default:
UNREACHABLE_MSG("Read U32 attribute {}", attr);
}

View File

@ -28,8 +28,6 @@ Id EmitConditionRef(EmitContext& ctx, const IR::Value& value);
void EmitReference(EmitContext&);
void EmitPhiMove(EmitContext&);
void EmitJoin(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitGetScc(EmitContext& ctx);
void EmitGetExec(EmitContext& ctx);
void EmitGetVcc(EmitContext& ctx);
@ -53,6 +51,7 @@ void EmitDebugPrint(EmitContext& ctx, IR::Inst* inst, Id arg0, Id arg1, Id arg2,
void EmitBarrier(EmitContext& ctx);
void EmitWorkgroupMemoryBarrier(EmitContext& ctx);
void EmitDeviceMemoryBarrier(EmitContext& ctx);
void EmitTcsOutputBarrier(EmitContext& ctx);
Id EmitGetUserData(EmitContext& ctx, IR::ScalarReg reg);
void EmitGetThreadBitScalarReg(EmitContext& ctx);
void EmitSetThreadBitScalarReg(EmitContext& ctx);

View File

@ -386,6 +386,13 @@ void EmitContext::DefineInputs() {
}
break;
}
case LogicalStage::TessellationControl: {
invocation_id =
DefineVariable(U32[3], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
patch_vertices =
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
break;
}
case LogicalStage::TessellationEval: {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
break;

View File

@ -193,6 +193,7 @@ public:
Id clip_distances{};
Id cull_distances{};
Id patch_vertices{};
Id output_tess_level_outer{};
Id output_tess_level_inner{};
Id tess_coord;
@ -200,6 +201,7 @@ public:
Id workgroup_id{};
Id local_invocation_id{};
Id invocation_id{}; // for instanced geoshaders or output vertices within TCS patch
Id subgroup_local_invocation_id{};
Id image_u32{};

View File

@ -1,5 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/runtime_info.h"
#pragma clang optimize off
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/ir/reg.h"
@ -73,10 +74,11 @@ void Translator::EmitDataShare(const GcnInst& inst) {
void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
const IR::U32 value{GetSrc(inst.src[0])};
if (info.stage != Stage::Compute) {
SetDst(inst.dst[0], value);
} else {
if (info.l_stage == LogicalStage::Compute ||
info.l_stage == LogicalStage::TessellationControl) {
SetDst(inst.dst[0], ir.ReadFirstLane(value));
} else {
SetDst(inst.dst[0], value);
}
}

View File

@ -1,6 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <bit>
#include "common/assert.h"
#include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn {
@ -78,6 +80,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
return S_BFM_B32(inst);
case Opcode::S_MUL_I32:
return S_MUL_I32(inst);
case Opcode::S_BFE_I32:
return S_BFE_I32(inst);
case Opcode::S_BFE_U32:
return S_BFE_U32(inst);
case Opcode::S_ABSDIFF_I32:
@ -444,6 +448,24 @@ void Translator::S_BFE_U32(const GcnInst& inst) {
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_BFE_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
IR::U32 result;
ASSERT_MSG(src1.IsImmediate(), "Unhandled S_BFE_I32 with non-immediate mask");
u32 mask = src1.U32();
ASSERT(mask != 0);
u32 offset = std::countr_zero(mask);
u32 count = std::popcount(mask);
mask = mask >> offset;
ASSERT_MSG((mask & (mask + 1)) == 0, "mask {} has non-adjacent bits set");
result = ir.BitFieldExtract(src0, ir.Imm32(offset), ir.Imm32(count), true);
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
}
void Translator::S_ABSDIFF_I32(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};

View File

@ -8,6 +8,8 @@
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
@ -51,7 +53,7 @@ void Translator::EmitPrologue() {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
}
break;
case Stage::Fragment:
case LogicalStage::Fragment:
dst_vreg = IR::VectorReg::V0;
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
++dst_vreg; // I
@ -121,15 +123,28 @@ void Translator::EmitPrologue() {
}
}
break;
case LogicalStage::TessellationControl:
ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
case LogicalStage::TessellationControl: {
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo));
// Test
// ir.SetPatch(IR::Patch::TessellationLodLeft, ir.Imm32(1.0f));
// ir.SetPatch(IR::Patch::TessellationLodTop, ir.Imm32(1.0f));
// ir.SetPatch(IR::Patch::TessellationLodRight, ir.Imm32(1.0f));
// ir.SetPatch(IR::Patch::TessellationLodBottom, ir.Imm32(1.0f));
// ir.SetPatch(IR::Patch::TessellationLodInteriorU, ir.Imm32(1.0f));
// ir.SetPatch(IR::Patch::TessellationLodInteriorV, ir.Imm32(1.0f));
break;
}
case LogicalStage::TessellationEval:
ir.SetVectorReg(IR::VectorReg::V0,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
ir.SetVectorReg(IR::VectorReg::V1,
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
// I think V2 is actually the patch id within the patches running on the local CU, used in
// compiler generated address calcs,
// and V3 is the patch id within the draw
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::TessPatchIdInVgt));
ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
break;
case LogicalStage::Compute:
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));

View File

@ -95,6 +95,7 @@ public:
void S_BFM_B32(const GcnInst& inst);
void S_MUL_I32(const GcnInst& inst);
void S_BFE_U32(const GcnInst& inst);
void S_BFE_I32(const GcnInst& inst);
void S_ABSDIFF_I32(const GcnInst& inst);
void S_NOT_B32(const GcnInst& inst);

View File

@ -251,6 +251,10 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
"Non immediate offset not supported");
}
if (info.stage == Stage::Hull) {
// printf("here\n"); // break
}
IR::Value address = [&] -> IR::Value {
if (is_ring) {
return ir.CompositeConstruct(ir.GetVectorReg(vaddr), soffset);

View File

@ -104,6 +104,8 @@ std::string NameOf(Attribute attribute) {
return "VertexId";
case Attribute::InstanceId:
return "InstanceId";
case Attribute::PrimitiveId:
return "PrimitiveId";
case Attribute::FragCoord:
return "FragCoord";
case Attribute::IsFrontFace:
@ -114,6 +116,12 @@ std::string NameOf(Attribute attribute) {
return "LocalInvocationId";
case Attribute::LocalInvocationIndex:
return "LocalInvocationIndex";
case Attribute::InvocationId:
return "InvocationId";
case Attribute::PackedHullInvocationInfo:
return "PackedHullInvocationInfo";
case Attribute::PatchVertices:
return "PatchVertices";
case Attribute::TessellationEvaluationPointU:
return "TessellationEvaluationPointU";
case Attribute::TessellationEvaluationPointV:

View File

@ -72,10 +72,14 @@ enum class Attribute : u64 {
LocalInvocationId = 75,
LocalInvocationIndex = 76,
FragCoord = 77,
InstanceId0 = 78, // step rate 0
InstanceId1 = 79, // step rate 1
TessellationEvaluationPointU = 80,
TessellationEvaluationPointV = 81,
InstanceId0 = 78, // step rate 0
InstanceId1 = 79, // step rate 1
InvocationId = 80, // TCS id in output patch and instanced geometry shader id
PackedHullInvocationInfo =
81, // PrimitiveId (patch id) and InvocationId (output control point id)
PatchVertices = 82,
TessellationEvaluationPointU = 83,
TessellationEvaluationPointV = 84,
Max,
};

View File

@ -134,6 +134,10 @@ void IREmitter::DeviceMemoryBarrier() {
Inst(Opcode::DeviceMemoryBarrier);
}
void IREmitter::TcsOutputBarrier() {
Inst(Opcode::TcsOutputBarrier);
}
U32 IREmitter::GetUserData(IR::ScalarReg reg) {
ASSERT(static_cast<u32>(reg) < IR::NumScalarRegs);
return Inst<U32>(Opcode::GetUserData, reg);

View File

@ -10,8 +10,8 @@
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/condition.h"
#include "shader_recompiler/ir/value.h"
#include "shader_recompiler/ir/patch.h"
#include "shader_recompiler/ir/value.h"
namespace Shader::IR {
@ -50,6 +50,7 @@ public:
void Barrier();
void WorkgroupMemoryBarrier();
void DeviceMemoryBarrier();
void TcsOutputBarrier();
[[nodiscard]] U32 GetUserData(IR::ScalarReg reg);
[[nodiscard]] U1 GetThreadBitScalarReg(IR::ScalarReg reg);

View File

@ -44,6 +44,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
case Opcode::Barrier:
case Opcode::WorkgroupMemoryBarrier:
case Opcode::DeviceMemoryBarrier:
case Opcode::TcsOutputBarrier:
case Opcode::ConditionRef:
case Opcode::Reference:
case Opcode::PhiMove:

View File

@ -24,6 +24,7 @@ OPCODE(ReadConstBuffer, U32, Opaq
OPCODE(Barrier, Void, )
OPCODE(WorkgroupMemoryBarrier, Void, )
OPCODE(DeviceMemoryBarrier, Void, )
OPCODE(TcsOutputBarrier, Void, )
// Geometry shader specific
OPCODE(EmitVertex, Void, )

View File

@ -1,6 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/program.h"
@ -60,9 +59,14 @@ namespace Shader::Optimization {
* NOTE: This pass must be run before constant propagation as it relies on relatively specific
* pattern matching that might be mutated that that optimization pass.
*
* TODO: need to be careful about reading from output arrays at idx other than InvocationID
* Need SPIRV OpControlBarrier
* "Wait for all active invocations within the specified Scope to reach the current point of
* execution."
* Must be placed in uniform control flow
*/
void HullShaderTransform(const IR::Program& program) {
void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info) {
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {

View File

@ -16,8 +16,7 @@ void FlattenExtendedUserdataPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
void LowerSharedMemToRegisters(IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage);
void HullShaderTransform(const IR::Program& program);
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info);
void HullShaderTransform(const IR::Program& program, const RuntimeInfo& runtime_info);
} // namespace Shader::Optimization

View File

@ -1,18 +1,89 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/assert.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/opcodes.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
namespace {
// TODO clean this up. Maybe remove
// from https://github.com/chaotic-cx/mesa-mirror/blob/main/src/amd/compiler/README.md
// basically logical stage x hw stage permutations
enum class SwHwStagePerm {
vertex_vs,
fragment_fs,
vertex_ls,
tess_control_hs,
tess_eval_vs,
vertex_es,
geometry_gs,
gs_copy_vs,
tess_eval_es,
compute_cs,
};
static SwHwStagePerm GetSwHwStagePerm(Shader::Stage hw_stage, Shader::LogicalStage sw_stage) {
using namespace Shader;
switch (sw_stage) {
case LogicalStage::Fragment:
ASSERT(hw_stage == Stage::Fragment);
return SwHwStagePerm::fragment_fs;
case LogicalStage::Vertex: {
switch (hw_stage) {
case Stage::Vertex:
return SwHwStagePerm::vertex_vs;
case Stage::Export:
return SwHwStagePerm::vertex_es;
case Stage::Local:
return SwHwStagePerm::vertex_ls;
default:
UNREACHABLE();
}
} break;
case LogicalStage::TessellationControl:
ASSERT(hw_stage == Stage::Hull);
return SwHwStagePerm::tess_control_hs;
case LogicalStage::TessellationEval: {
switch (hw_stage) {
case Stage::Vertex:
return SwHwStagePerm::tess_eval_vs;
case Stage::Export:
return SwHwStagePerm::tess_eval_es;
default:
UNREACHABLE();
}
}
case LogicalStage::Geometry:
ASSERT(hw_stage == Stage::Geometry);
return SwHwStagePerm::geometry_gs;
case LogicalStage::GsCopy:
ASSERT(hw_stage == Stage::Vertex);
return SwHwStagePerm::gs_copy_vs;
case LogicalStage::Compute:
ASSERT(hw_stage == Stage::Compute);
return SwHwStagePerm::compute_cs;
default:
UNREACHABLE();
}
}
}; // namespace
namespace Shader::Optimization {
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
Stage stage) {
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info) {
auto& info = program.info;
Stage stage = info.stage;
LogicalStage l_stage = info.l_stage;
SwHwStagePerm stage_perm = GetSwHwStagePerm(stage, l_stage);
const auto& ForEachInstruction = [&](auto func) {
for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
@ -22,38 +93,40 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
}
};
switch (stage) {
case Stage::Local: {
switch (stage_perm) {
case SwHwStagePerm::vertex_ls: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
case IR::Opcode::WriteSharedU64: {
case IR::Opcode::WriteSharedU64:
case IR::Opcode::WriteSharedU32: {
bool is_composite = opcode == IR::Opcode::WriteSharedU64;
u32 num_components = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
u32 offset = 0;
const auto* addr = inst.Arg(0).InstRecursive();
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
ASSERT(addr->Arg(1).IsImmediate());
offset = addr->Arg(1).U32();
}
const IR::Inst* pair = inst.Arg(1).InstRecursive();
for (s32 i = 0; i < 2; i++) {
IR::Value data = inst.Arg(1).Resolve();
for (s32 i = 0; i < num_components; i++) {
const auto attrib = IR::Attribute::Param0 + (offset / 16);
const auto comp = (offset / 4) % 4;
const IR::U32 value = IR::U32{pair->Arg(i)};
const IR::U32 value = IR::U32{is_composite ? data.Inst()->Arg(i) : data};
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
offset += 4;
}
inst.Invalidate();
break;
}
case IR::Opcode::WriteSharedU32:
UNREACHABLE();
default:
break;
}
});
break;
}
case Stage::Export: {
case SwHwStagePerm::vertex_es: {
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
const auto opcode = inst.GetOpcode();
switch (opcode) {
@ -84,7 +157,7 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
});
break;
}
case Stage::Geometry: {
case SwHwStagePerm::geometry_gs: {
const auto& gs_info = runtime_info.gs_info;
info.gs_copy_data = Shader::ParseCopyShader(gs_info.vs_copy);
@ -112,8 +185,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
break;
}
case IR::Opcode::StoreBufferU32: {
const auto info = inst.Flags<IR::BufferInstInfo>();
if (!info.system_coherent || !info.globally_coherent) {
const auto buffer_info = inst.Flags<IR::BufferInstInfo>();
if (!buffer_info.system_coherent || !buffer_info.globally_coherent) {
break;
}

View File

@ -1,6 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
@ -61,12 +64,45 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
// Run optimization passes
const auto stage = program.info.stage;
bool dump_ir = true;
bool extra_id_removal = true; // TODO remove all this stuff
auto dumpMatchingIR = [&](std::string phase) {
if (dump_ir) {
if (Config::dumpShaders()) {
std::string s = IR::DumpProgram(program);
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename =
fmt::format("{}_{:#018x}.{}.ir.txt", info.stage, info.pgm_hash, phase);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteString(s);
}
}
};
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
if (extra_id_removal) {
Shader::Optimization::IdentityRemovalPass(program.blocks);
}
if (stage == Stage::Hull) {
Shader::Optimization::HullShaderTransform(program);
dumpMatchingIR("pre_hull");
Shader::Optimization::HullShaderTransform(program, runtime_info);
dumpMatchingIR("post_hull");
}
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
if (extra_id_removal) {
Shader::Optimization::IdentityRemovalPass(program.blocks);
}
dumpMatchingIR("pre_ring");
Shader::Optimization::RingAccessElimination(program, runtime_info);
if (extra_id_removal) {
Shader::Optimization::IdentityRemovalPass(program.blocks);
}
dumpMatchingIR("post_ring");
if (stage != Stage::Compute) {
Shader::Optimization::LowerSharedMemToRegisters(program);
}

View File

@ -29,6 +29,7 @@ enum class LogicalStage : u32 {
TessellationControl,
TessellationEval,
Geometry,
GsCopy,
Compute,
};
@ -86,8 +87,18 @@ struct VertexRuntimeInfo {
struct HullRuntimeInfo {
u32 output_control_points;
// trying to debug TODO probably delete this
u32 input_control_points;
u32 num_patches;
u32 num_instances;
u64 tess_factor_memory_base;
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
bool operator==(const HullRuntimeInfo& other) const noexcept {
return output_control_points == other.output_control_points;
}
};
static constexpr auto GsMaxOutputStreams = 4u;

View File

@ -591,6 +591,16 @@ struct Liverpool {
BitField<2, 2, IndexSwapMode> swap_mode;
};
union MultiVgtParam {
u32 raw;
BitField<0, 16, u32> primgroup_size;
BitField<16, 1, u32> partial_vs_wave_on;
BitField<17, 1, u32> switch_on_eop;
BitField<18, 1, u32> partial_es_wave_on;
BitField<19, 1, u32> switch_on_eoi;
BitField<20, 1, u32> wd_switch_on_eop;
};
union VgtNumInstances {
u32 num_instances;
@ -1074,6 +1084,17 @@ struct Liverpool {
BitField<5, 3, TessellationTopology> topology;
};
union TessFactorMemoryBase {
// TODO: was going to use this to check against UD used in tcs shader
// but only seen set to 0
// Remove this and other added regs if they end up unused
u32 base;
u64 MemoryBase() const {
return static_cast<u64>(base) << 8;
}
};
union Eqaa {
u32 raw;
BitField<0, 1, u32> max_anchor_samples;
@ -1208,7 +1229,7 @@ struct Liverpool {
INSERT_PADDING_WORDS(0xA2A8 - 0xA2A5 - 1);
u32 vgt_instance_step_rate_0;
u32 vgt_instance_step_rate_1;
INSERT_PADDING_WORDS(0xA2AB - 0xA2A9 - 1);
MultiVgtParam ia_multi_vgt_param;
u32 vgt_esgs_ring_itemsize;
u32 vgt_gsvs_ring_itemsize;
INSERT_PADDING_WORDS(0xA2CE - 0xA2AC - 1);
@ -1232,6 +1253,8 @@ struct Liverpool {
INSERT_PADDING_WORDS(0xC24C - 0xC243);
u32 num_indices;
VgtNumInstances num_instances;
INSERT_PADDING_WORDS(0xC250 - 0xC24D - 1);
TessFactorMemoryBase vgt_tf_memory_base;
};
std::array<u32, NumRegs> reg_array{};
@ -1456,6 +1479,7 @@ static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1);
static_assert(GFX6_3D_REG_INDEX(enable_primitive_restart) == 0xA2A5);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8);
static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9);
static_assert(GFX6_3D_REG_INDEX(ia_multi_vgt_param) == 0xA2AA);
static_assert(GFX6_3D_REG_INDEX(vgt_esgs_ring_itemsize) == 0xA2AB);
static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
@ -1473,6 +1497,7 @@ static_assert(GFX6_3D_REG_INDEX(color_buffers[0].slice) == 0xA31A);
static_assert(GFX6_3D_REG_INDEX(color_buffers[7].base_address) == 0xA381);
static_assert(GFX6_3D_REG_INDEX(primitive_type) == 0xC242);
static_assert(GFX6_3D_REG_INDEX(num_instances) == 0xC24D);
static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
#undef GFX6_3D_REG_INDEX

View File

@ -3,6 +3,8 @@
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
@ -27,6 +29,19 @@ enum class TessellationType : u32 {
Quad = 2,
};
constexpr std::string_view NameOf(TessellationType type) {
switch (type) {
case TessellationType::Isoline:
return "Isoline";
case TessellationType::Triangle:
return "Triangle";
case TessellationType::Quad:
return "Quad";
default:
return "Unknown";
}
}
enum class TessellationPartitioning : u32 {
Integer = 0,
Pow2 = 1,
@ -34,6 +49,21 @@ enum class TessellationPartitioning : u32 {
FracEven = 3,
};
constexpr std::string_view NameOf(TessellationPartitioning partitioning) {
switch (partitioning) {
case TessellationPartitioning::Integer:
return "Integer";
case TessellationPartitioning::Pow2:
return "Pow2";
case TessellationPartitioning::FracOdd:
return "FracOdd";
case TessellationPartitioning::FracEven:
return "FracEven";
default:
return "Unknown";
}
}
enum class TessellationTopology : u32 {
Point = 0,
Line = 1,
@ -41,6 +71,21 @@ enum class TessellationTopology : u32 {
TriangleCcw = 3,
};
constexpr std::string_view NameOf(TessellationTopology topology) {
switch (topology) {
case TessellationTopology::Point:
return "Point";
case TessellationTopology::Line:
return "Line";
case TessellationTopology::TriangleCw:
return "TriangleCw";
case TessellationTopology::TriangleCcw:
return "TriangleCcw";
default:
return "Unknown";
}
}
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
enum class PrimitiveType : u32 {
None = 0,
@ -138,3 +183,33 @@ enum class NumberFormat : u32 {
};
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::TessellationType> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationType type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationPartitioning> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationPartitioning type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};
template <>
struct fmt::formatter<AmdGpu::TessellationTopology> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::TessellationTopology type, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(type));
}
};

View File

@ -106,6 +106,11 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
key.primitive_restart_index == 0xFFFFFFFF,
"Primitive restart index other than -1 is not supported yet");
const vk::PipelineTessellationStateCreateInfo tessellation_state = {
// TODO how to handle optional member of graphics key when dynamic state not supported?
//.patchControlPoints = key.
};
const vk::PipelineRasterizationStateCreateInfo raster_state = {
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
@ -168,6 +173,10 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
} else {
dynamic_states.push_back(vk::DynamicState::eVertexInputBindingStrideEXT);
}
ASSERT(instance.IsPatchControlPointsDynamicState()); // TODO remove
if (instance.IsPatchControlPointsDynamicState()) {
dynamic_states.push_back(vk::DynamicState::ePatchControlPointsEXT);
}
const vk::PipelineDynamicStateCreateInfo dynamic_info = {
.dynamicStateCount = static_cast<u32>(dynamic_states.size()),
@ -317,6 +326,8 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.pStages = shader_stages.data(),
.pVertexInputState = !instance.IsVertexInputDynamicState() ? &vertex_input_info : nullptr,
.pInputAssemblyState = &input_assembly,
.pTessellationState =
!instance.IsPatchControlPointsDynamicState() ? &tessellation_state : nullptr,
.pViewportState = &viewport_info,
.pRasterizationState = &raster_state,
.pMultisampleState = &multisampling,

View File

@ -258,6 +258,7 @@ bool Instance::CreateDevice() {
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME);
extended_dynamic_state_2 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME);
// The next two extensions are required to be available together in order to support write masks
color_write_en = add_extension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME);

View File

@ -133,6 +133,10 @@ public:
return vertex_input_dynamic_state;
}
bool IsPatchControlPointsDynamicState() const {
return extended_dynamic_state_2;
}
/// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported.
bool IsNullDescriptorSupported() const {
return null_descriptor;
@ -333,6 +337,7 @@ private:
bool debug_utils_supported{};
bool has_nsight_graphics{};
bool has_renderdoc{};
bool extended_dynamic_state_2{};
};
} // namespace Vulkan

View File

@ -98,6 +98,14 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
case Stage::Hull: {
BuildCommon(regs.hs_program);
info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();
info.hs_info.input_control_points = regs.ls_hs_config.hs_input_control_points;
info.hs_info.num_patches = regs.ls_hs_config.num_patches;
// Suspicious about this in apparently "passthrough" hull shader. Probably not releva
info.hs_info.num_instances = regs.num_instances.NumInstances();
info.hs_info.tess_factor_memory_base = regs.vgt_tf_memory_base.MemoryBase();
info.hs_info.tess_type = regs.tess_config.type;
info.hs_info.tess_topology = regs.tess_config.topology;
info.hs_info.tess_partitioning = regs.tess_config.partitioning;
break;
}
case Stage::Export: {
@ -236,6 +244,27 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
return it->second.get();
}
bool ShouldSkipShader(u64 shader_hash, const char* shader_type) {
static std::vector<u64> skip_hashes = {
0xbc234799 /* passthrough */,
0x8453cd1c /* passthrough */,
0xd67db0ef /* passthrough */,
0x34121ac6 /* passthrough*/,
0xa26750c1 /* passthrough, warp */,
0xbb88db5f /* passthrough */,
0x90c6fb05 /* passthrough */,
0x9fd272d7 /* forbidden woods (not PS) */,
0x2807dd6c /* forbidden woods, down elevator (not PS) */,
0x627ac5b9 /* ayyylmao*, passthrough */,
0xb5fb5174 /* rom (not PS) */,
};
if (std::ranges::contains(skip_hashes, shader_hash)) {
LOG_WARNING(Render_Vulkan, "Skipped {} shader hash {:#x}.", shader_type, shader_hash);
return true;
}
return false;
}
bool PipelineCache::RefreshGraphicsKey() {
std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey));
@ -344,6 +373,10 @@ bool PipelineCache::RefreshGraphicsKey() {
return false;
}
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
return false;
}
auto params = Liverpool::GetParams(*pgm);
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
@ -453,7 +486,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.num_samples = num_samples;
return true;
}
} // namespace Vulkan
bool PipelineCache::RefreshComputeKey() {
Shader::Backend::Bindings binding{};

View File

@ -4,6 +4,7 @@
#include "common/config.h"
#include "common/debug.h"
#include "core/memory.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
@ -214,7 +215,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
return;
}
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
@ -271,7 +272,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
return;
}
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
const auto& fetch_shader = pipeline->GetFetchShader();
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
buffer_cache.BindIndexBuffer(is_indexed, 0);
@ -932,6 +933,11 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) {
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back.stencil_mask);
}
}
if (instance.IsPatchControlPointsDynamicState()) {
if (regs.primitive_type == AmdGpu::PrimitiveType::PatchPrimitive) {
cmdbuf.setPatchControlPointsEXT(regs.ls_hs_config.hs_input_control_points);
}
}
}
void Rasterizer::UpdateViewportScissorState() {