mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-04 16:32:39 +00:00
shader_recompiler: Tessellation WIP
This commit is contained in:
parent
715ac8a279
commit
c9f0771c0f
@ -664,6 +664,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
||||
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
|
||||
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
|
||||
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||
src/shader_recompiler/ir/passes/ir_passes.h
|
||||
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp
|
||||
@ -683,6 +684,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||
src/shader_recompiler/ir/opcodes.cpp
|
||||
src/shader_recompiler/ir/opcodes.h
|
||||
src/shader_recompiler/ir/opcodes.inc
|
||||
src/shader_recompiler/ir/patch.cpp
|
||||
src/shader_recompiler/ir/patch.h
|
||||
src/shader_recompiler/ir/post_order.cpp
|
||||
src/shader_recompiler/ir/post_order.h
|
||||
src/shader_recompiler/ir/program.cpp
|
||||
|
@ -1,6 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma clang optimize off
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
@ -72,7 +72,10 @@ ArgType Arg(EmitContext& ctx, const IR::Value& arg) {
|
||||
return arg.VectorReg();
|
||||
} else if constexpr (std::is_same_v<ArgType, const char*>) {
|
||||
return arg.StringLiteral();
|
||||
} else if constexpr (std::is_same_v<ArgType, IR::Patch>) {
|
||||
return arg.Patch();
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
template <auto func, bool is_first_arg_inst, size_t... I>
|
||||
@ -206,6 +209,32 @@ Id DefineMain(EmitContext& ctx, const IR::Program& program) {
|
||||
return main;
|
||||
}
|
||||
|
||||
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationType primitive) {
|
||||
switch (primitive) {
|
||||
case AmdGpu::TessellationType::Isoline:
|
||||
return spv::ExecutionMode::Isolines;
|
||||
case AmdGpu::TessellationType::Triangle:
|
||||
return spv::ExecutionMode::Triangles;
|
||||
case AmdGpu::TessellationType::Quad:
|
||||
return spv::ExecutionMode::Quads;
|
||||
}
|
||||
UNREACHABLE_MSG("Tessellation primitive {}", primitive);
|
||||
}
|
||||
|
||||
spv::ExecutionMode ExecutionMode(AmdGpu::TessellationPartitioning spacing) {
|
||||
switch (spacing) {
|
||||
case AmdGpu::TessellationPartitioning::Integer:
|
||||
return spv::ExecutionMode::SpacingEqual;
|
||||
case AmdGpu::TessellationPartitioning::FracOdd:
|
||||
return spv::ExecutionMode::SpacingFractionalOdd;
|
||||
case AmdGpu::TessellationPartitioning::FracEven:
|
||||
return spv::ExecutionMode::SpacingFractionalEven;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE_MSG("Tessellation spacing {}", spacing);
|
||||
}
|
||||
|
||||
void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ctx) {
|
||||
ctx.AddCapability(spv::Capability::Image1D);
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
@ -244,11 +273,12 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
if (info.uses_group_ballot) {
|
||||
ctx.AddCapability(spv::Capability::GroupNonUniformBallot);
|
||||
}
|
||||
if (info.stage == Stage::Export || info.stage == Stage::Vertex) {
|
||||
const auto stage = info.l_stage;
|
||||
if (stage == LogicalStage::Vertex) {
|
||||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
}
|
||||
if (info.stage == Stage::Geometry) {
|
||||
if (stage == LogicalStage::Geometry) {
|
||||
ctx.AddCapability(spv::Capability::Geometry);
|
||||
}
|
||||
if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) {
|
||||
@ -257,23 +287,39 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
|
||||
}
|
||||
}
|
||||
|
||||
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
const auto& info = program.info;
|
||||
void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
|
||||
spv::ExecutionModel execution_model{};
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
switch (info.l_stage) {
|
||||
case LogicalStage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||
execution_model = spv::ExecutionModel::GLCompute;
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
workgroup_size[1], workgroup_size[2]);
|
||||
break;
|
||||
}
|
||||
case Stage::Export:
|
||||
case Stage::Vertex:
|
||||
case LogicalStage::Vertex:
|
||||
execution_model = spv::ExecutionModel::Vertex;
|
||||
break;
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::TessellationControl:
|
||||
execution_model = spv::ExecutionModel::TessellationControl;
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||
ctx.runtime_info.hs_info.output_control_points);
|
||||
break;
|
||||
case LogicalStage::TessellationEval: {
|
||||
execution_model = spv::ExecutionModel::TessellationEvaluation;
|
||||
ctx.AddCapability(spv::Capability::Tessellation);
|
||||
const auto& vs_info = ctx.runtime_info.vs_info;
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
|
||||
ctx.AddExecutionMode(main,
|
||||
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
|
||||
? spv::ExecutionMode::VertexOrderCcw
|
||||
: spv::ExecutionMode::VertexOrderCw);
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
execution_model = spv::ExecutionModel::Fragment;
|
||||
if (ctx.profile.lower_left_origin_mode) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft);
|
||||
@ -288,7 +334,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
case LogicalStage::Geometry:
|
||||
execution_model = spv::ExecutionModel::Geometry;
|
||||
ctx.AddExecutionMode(main, GetInputPrimitiveType(ctx.runtime_info.gs_info.in_primitive));
|
||||
ctx.AddExecutionMode(main,
|
||||
@ -299,7 +345,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||
ctx.runtime_info.gs_info.num_invocations);
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Stage {}", u32(program.info.stage));
|
||||
UNREACHABLE_MSG("Stage {}", u32(info.stage));
|
||||
}
|
||||
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
|
||||
}
|
||||
@ -345,7 +391,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
|
||||
const IR::Program& program, Bindings& binding) {
|
||||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||
const Id main{DefineMain(ctx, program)};
|
||||
DefineEntryPoint(program, ctx, main);
|
||||
DefineEntryPoint(program.info, ctx, main);
|
||||
SetupCapabilities(program.info, profile, ctx);
|
||||
SetupFloatMode(ctx, profile, runtime_info, main);
|
||||
PatchPhiNodes(program, ctx);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
|
||||
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
#include <magic_enum/magic_enum.hpp>
|
||||
|
||||
@ -242,8 +243,14 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
|
||||
}
|
||||
return coord;
|
||||
}
|
||||
case IR::Attribute::TessellationEvaluationPointU:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value));
|
||||
case IR::Attribute::TessellationEvaluationPointV:
|
||||
return ctx.OpLoad(ctx.F32[1],
|
||||
ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U)));
|
||||
default:
|
||||
throw NotImplementedException("Read attribute {}", attr);
|
||||
UNREACHABLE_MSG("Read attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -269,7 +276,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
|
||||
ASSERT(ctx.info.stage == Stage::Geometry);
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.primitive_id);
|
||||
default:
|
||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||
UNREACHABLE_MSG("Read U32 attribute {}", attr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -287,6 +294,42 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
|
||||
}
|
||||
}
|
||||
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
const Id type{ctx.stage == Stage::Hull ? ctx.output_f32 : ctx.input_f32};
|
||||
const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)};
|
||||
return ctx.OpLoad(ctx.F32[1], pointer);
|
||||
}
|
||||
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
|
||||
const Id pointer{[&] {
|
||||
if (IR::IsGeneric(patch)) {
|
||||
const u32 index{IR::GenericPatchIndex(patch)};
|
||||
const Id element{ctx.ConstU32(IR::GenericPatchElement(patch))};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element);
|
||||
}
|
||||
switch (patch) {
|
||||
case IR::Patch::TessellationLodLeft:
|
||||
case IR::Patch::TessellationLodRight:
|
||||
case IR::Patch::TessellationLodTop:
|
||||
case IR::Patch::TessellationLodBottom: {
|
||||
const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)};
|
||||
const Id index_id{ctx.ConstU32(index)};
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id);
|
||||
}
|
||||
case IR::Patch::TessellationLodInteriorU:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner,
|
||||
ctx.u32_zero_value);
|
||||
case IR::Patch::TessellationLodInteriorV:
|
||||
return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.ConstU32(1u));
|
||||
default:
|
||||
UNREACHABLE_MSG("Patch {}", u32(patch));
|
||||
}
|
||||
}()};
|
||||
ctx.OpStore(pointer, value);
|
||||
}
|
||||
|
||||
template <u32 N>
|
||||
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
|
||||
auto& buffer = ctx.buffers[handle];
|
||||
|
@ -9,6 +9,7 @@
|
||||
namespace Shader::IR {
|
||||
enum class Attribute : u64;
|
||||
enum class ScalarReg : u32;
|
||||
enum class Patch : u64;
|
||||
class Inst;
|
||||
class Value;
|
||||
} // namespace Shader::IR
|
||||
@ -88,6 +89,8 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);
|
||||
void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value);
|
||||
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
|
||||
void EmitSetSampleMask(EmitContext& ctx, Id value);
|
||||
void EmitSetFragDepth(EmitContext& ctx, Id value);
|
||||
|
@ -34,7 +34,7 @@ std::string_view StageName(Stage stage) {
|
||||
case Stage::Compute:
|
||||
return "cs";
|
||||
}
|
||||
throw InvalidArgument("Invalid stage {}", u32(stage));
|
||||
UNREACHABLE_MSG("Invalid hw stage {}", u32(stage));
|
||||
}
|
||||
|
||||
static constexpr u32 NumVertices(AmdGpu::PrimitiveType type) {
|
||||
@ -65,7 +65,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
||||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||
const Info& info_, Bindings& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||
profile{profile_}, stage{info.stage}, binding{binding_} {
|
||||
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces();
|
||||
@ -268,9 +268,8 @@ void EmitContext::DefineInputs() {
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
switch (l_stage) {
|
||||
case LogicalStage::Vertex: {
|
||||
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
|
||||
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
|
||||
instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input);
|
||||
@ -311,12 +310,11 @@ void EmitContext::DefineInputs() {
|
||||
}
|
||||
input_params[attrib.semantic] =
|
||||
GetAttributeInfo(sharp.GetNumberFmt(), id, 4, false);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::Fragment:
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
@ -351,15 +349,14 @@ void EmitContext::DefineInputs() {
|
||||
}
|
||||
input_params[semantic] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false);
|
||||
interfaces.push_back(attr_id);
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
case LogicalStage::Compute:
|
||||
workgroup_id = DefineVariable(U32[3], spv::BuiltIn::WorkgroupId, spv::StorageClass::Input);
|
||||
local_invocation_id =
|
||||
DefineVariable(U32[3], spv::BuiltIn::LocalInvocationId, spv::StorageClass::Input);
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
case LogicalStage::Geometry: {
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
const auto gl_per_vertex =
|
||||
Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))),
|
||||
@ -389,15 +386,18 @@ void EmitContext::DefineInputs() {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::TessellationEval: {
|
||||
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineOutputs() {
|
||||
switch (stage) {
|
||||
case Stage::Export:
|
||||
case Stage::Vertex: {
|
||||
switch (l_stage) {
|
||||
case LogicalStage::Vertex: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
|
||||
info.stores.Get(IR::Attribute::Position2) ||
|
||||
@ -419,11 +419,33 @@ void EmitContext::DefineOutputs() {
|
||||
Name(id, fmt::format("out_attr{}", i));
|
||||
output_params[i] =
|
||||
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
case LogicalStage::TessellationControl: {
|
||||
if (info.stores_tess_level_outer) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(4U))};
|
||||
output_tess_level_outer =
|
||||
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelOuter);
|
||||
Decorate(output_tess_level_outer, spv::Decoration::Patch);
|
||||
}
|
||||
if (info.stores_tess_level_inner) {
|
||||
const Id type{TypeArray(F32[1], ConstU32(2U))};
|
||||
output_tess_level_inner =
|
||||
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
|
||||
Decorate(output_tess_level_inner, spv::Decoration::Patch);
|
||||
}
|
||||
for (size_t index = 0; index < 30; ++index) {
|
||||
if (!(info.uses_patches & (1U << index))) {
|
||||
continue;
|
||||
}
|
||||
const Id id{DefineOutput(F32[4], index)};
|
||||
Decorate(id, spv::Decoration::Patch);
|
||||
patches[index] = id;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||
if (!info.stores.GetAny(mrt)) {
|
||||
@ -435,22 +457,22 @@ void EmitContext::DefineOutputs() {
|
||||
const Id id{DefineOutput(type, i)};
|
||||
Name(id, fmt::format("frag_color{}", i));
|
||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry: {
|
||||
case LogicalStage::Geometry: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
||||
for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) {
|
||||
const Id id{DefineOutput(F32[4], attr_id)};
|
||||
Name(id, fmt::format("out_attr{}", attr_id));
|
||||
output_params[attr_id] = {id, output_f32, F32[1], 4u};
|
||||
interfaces.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
case LogicalStage::Compute:
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,14 +46,18 @@ public:
|
||||
void DefineBufferOffsets();
|
||||
void DefineInterpolatedAttribs();
|
||||
|
||||
[[nodiscard]] Id DefineInput(Id type, u32 location) {
|
||||
const Id input_id{DefineVar(type, spv::StorageClass::Input)};
|
||||
Decorate(input_id, spv::Decoration::Location, location);
|
||||
[[nodiscard]] Id DefineInput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
const Id input_id{DefineVariable(type, builtin, spv::StorageClass::Input)};
|
||||
if (location) {
|
||||
Decorate(input_id, spv::Decoration::Location, *location);
|
||||
}
|
||||
return input_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt) {
|
||||
const Id output_id{DefineVar(type, spv::StorageClass::Output)};
|
||||
[[nodiscard]] Id DefineOutput(Id type, std::optional<u32> location = std::nullopt,
|
||||
std::optional<spv::BuiltIn> builtin = std::nullopt) {
|
||||
const Id output_id{DefineVariable(type, builtin, spv::StorageClass::Output)};
|
||||
if (location) {
|
||||
Decorate(output_id, spv::Decoration::Location, *location);
|
||||
}
|
||||
@ -131,7 +135,8 @@ public:
|
||||
const Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
Stage stage{};
|
||||
Stage stage;
|
||||
LogicalStage l_stage{};
|
||||
|
||||
Id void_id{};
|
||||
Id U8{};
|
||||
@ -188,6 +193,11 @@ public:
|
||||
Id clip_distances{};
|
||||
Id cull_distances{};
|
||||
|
||||
Id output_tess_level_outer{};
|
||||
Id output_tess_level_inner{};
|
||||
Id tess_coord;
|
||||
std::array<Id, 30> patches{};
|
||||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
|
@ -1,6 +1,6 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma clang optimize off
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
|
||||
|
@ -34,9 +34,8 @@ void Translator::EmitPrologue() {
|
||||
}
|
||||
|
||||
IR::VectorReg dst_vreg = IR::VectorReg::V0;
|
||||
switch (info.stage) {
|
||||
case Stage::Vertex:
|
||||
case Stage::Export:
|
||||
switch (info.l_stage) {
|
||||
case LogicalStage::Vertex:
|
||||
// v0: vertex ID, always present
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||
// v1: instance ID, step rate 0
|
||||
@ -122,7 +121,17 @@ void Translator::EmitPrologue() {
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Stage::Compute:
|
||||
case LogicalStage::TessellationControl:
|
||||
ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
case LogicalStage::TessellationEval:
|
||||
ir.SetVectorReg(IR::VectorReg::V0,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU));
|
||||
ir.SetVectorReg(IR::VectorReg::V1,
|
||||
ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV));
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
case LogicalStage::Compute:
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||
@ -137,7 +146,7 @@ void Translator::EmitPrologue() {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
}
|
||||
break;
|
||||
case Stage::Geometry:
|
||||
case LogicalStage::Geometry:
|
||||
switch (runtime_info.gs_info.out_primitive[0]) {
|
||||
case AmdGpu::GsOutputPrimitiveType::TriangleStrip:
|
||||
ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2
|
||||
@ -152,7 +161,7 @@ void Translator::EmitPrologue() {
|
||||
ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId));
|
||||
break;
|
||||
default:
|
||||
throw NotImplementedException("Unknown shader stage");
|
||||
UNREACHABLE_MSG("Unknown shader stage");
|
||||
}
|
||||
}
|
||||
|
||||
@ -503,7 +512,8 @@ void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Inf
|
||||
|
||||
// Special case for emitting fetch shader.
|
||||
if (inst.opcode == Opcode::S_SWAPPC_B64) {
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export);
|
||||
ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export ||
|
||||
info.stage == Stage::Local);
|
||||
translator.EmitFetch(inst);
|
||||
continue;
|
||||
}
|
||||
|
@ -217,7 +217,7 @@ public:
|
||||
|
||||
// VOP3a
|
||||
void V_MAD_F32(const GcnInst& inst);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = false);
|
||||
void V_MAD_I32_I24(const GcnInst& inst, bool is_signed = true);
|
||||
void V_MAD_U32_U24(const GcnInst& inst);
|
||||
void V_CUBEID_F32(const GcnInst& inst);
|
||||
void V_CUBESC_F32(const GcnInst& inst);
|
||||
|
@ -1060,8 +1060,14 @@ void Translator::V_CUBEMA_F32(const GcnInst& inst) {
|
||||
|
||||
void Translator::V_BFE_U32(bool is_signed, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{ir.BitwiseAnd(GetSrc(inst.src[1]), ir.Imm32(0x1F))};
|
||||
const IR::U32 src2{ir.BitwiseAnd(GetSrc(inst.src[2]), ir.Imm32(0x1F))};
|
||||
IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U32 src2{GetSrc(inst.src[2])};
|
||||
if (!src1.IsImmediate()) {
|
||||
src1 = ir.BitwiseAnd(src1, ir.Imm32(0x1F));
|
||||
}
|
||||
if (!src2.IsImmediate()) {
|
||||
src2 = ir.BitwiseAnd(src2, ir.Imm32(0x1F));
|
||||
}
|
||||
SetDst(inst.dst[0], ir.BitFieldExtract(src0, src1, src2, is_signed));
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,8 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
@ -245,7 +246,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
||||
const IR::ScalarReg sharp{inst.src[2].code * 4};
|
||||
const IR::Value soffset{GetSrc(inst.src[3])};
|
||||
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Geometry) {
|
||||
if (info.stage != Stage::Export && info.stage != Stage::Hull && info.stage != Stage::Geometry) {
|
||||
ASSERT_MSG(soffset.IsImmediate() && soffset.U32() == 0,
|
||||
"Non immediate offset not supported");
|
||||
}
|
||||
@ -267,7 +268,8 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
|
||||
buffer_info.index_enable.Assign(mtbuf.idxen);
|
||||
buffer_info.offset_enable.Assign(mtbuf.offen);
|
||||
buffer_info.inst_offset.Assign(mtbuf.offset);
|
||||
buffer_info.ring_access.Assign(is_ring);
|
||||
buffer_info.globally_coherent.Assign(mtbuf.glc);
|
||||
buffer_info.system_coherent.Assign(mtbuf.slc);
|
||||
if (is_typed) {
|
||||
const auto dmft = static_cast<AmdGpu::DataFormat>(mtbuf.dfmt);
|
||||
const auto nfmt = static_cast<AmdGpu::NumberFormat>(mtbuf.nfmt);
|
||||
|
@ -163,6 +163,7 @@ struct Info {
|
||||
UserDataMask ud_mask{};
|
||||
|
||||
CopyShaderData gs_copy_data;
|
||||
u32 uses_patches{};
|
||||
|
||||
BufferResourceList buffers;
|
||||
TextureBufferResourceList texture_buffers;
|
||||
@ -175,6 +176,7 @@ struct Info {
|
||||
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
LogicalStage l_stage;
|
||||
|
||||
u64 pgm_hash{};
|
||||
VAddr pgm_base;
|
||||
@ -190,14 +192,16 @@ struct Info {
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_fp64{};
|
||||
bool stores_tess_level_outer{};
|
||||
bool stores_tess_level_inner{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
bool has_readconst{};
|
||||
u8 mrt_mask{0u};
|
||||
bool has_fetch_shader{false};
|
||||
u32 fetch_shader_sgpr_base{0u};
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
|
||||
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
user_data{params.user_data} {}
|
||||
|
||||
template <typename T>
|
||||
|
@ -114,6 +114,10 @@ std::string NameOf(Attribute attribute) {
|
||||
return "LocalInvocationId";
|
||||
case Attribute::LocalInvocationIndex:
|
||||
return "LocalInvocationIndex";
|
||||
case Attribute::TessellationEvaluationPointU:
|
||||
return "TessellationEvaluationPointU";
|
||||
case Attribute::TessellationEvaluationPointV:
|
||||
return "TessellationEvaluationPointV";
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -74,6 +74,8 @@ enum class Attribute : u64 {
|
||||
FragCoord = 77,
|
||||
InstanceId0 = 78, // step rate 0
|
||||
InstanceId1 = 79, // step rate 1
|
||||
TessellationEvaluationPointU = 80,
|
||||
TessellationEvaluationPointV = 81,
|
||||
Max,
|
||||
};
|
||||
|
||||
|
@ -94,6 +94,8 @@ static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size
|
||||
return fmt::format("{}", arg.VectorReg());
|
||||
case Type::Attribute:
|
||||
return fmt::format("{}", arg.Attribute());
|
||||
case Type::Patch:
|
||||
return fmt::format("{}", arg.Patch());
|
||||
default:
|
||||
return "<unknown immediate type>";
|
||||
}
|
||||
|
@ -278,6 +278,14 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, u32 comp
|
||||
Inst(Opcode::SetAttribute, attribute, value, Imm32(comp));
|
||||
}
|
||||
|
||||
F32 IREmitter::GetPatch(Patch patch) {
|
||||
return Inst<F32>(Opcode::GetPatch, patch);
|
||||
}
|
||||
|
||||
void IREmitter::SetPatch(Patch patch, const F32& value) {
|
||||
Inst(Opcode::SetPatch, patch, value);
|
||||
}
|
||||
|
||||
Value IREmitter::LoadShared(int bit_size, bool is_signed, const U32& offset) {
|
||||
switch (bit_size) {
|
||||
case 32:
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/condition.h"
|
||||
#include "shader_recompiler/ir/value.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
@ -84,6 +85,9 @@ public:
|
||||
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
|
||||
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
|
||||
|
||||
[[nodiscard]] F32 GetPatch(Patch patch);
|
||||
void SetPatch(Patch patch, const F32& value);
|
||||
|
||||
[[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
|
||||
void WriteShared(int bit_size, const Value& value, const U32& offset);
|
||||
|
||||
|
@ -52,6 +52,7 @@ bool Inst::MayHaveSideEffects() const noexcept {
|
||||
case Opcode::Discard:
|
||||
case Opcode::DiscardCond:
|
||||
case Opcode::SetAttribute:
|
||||
case Opcode::SetPatch:
|
||||
case Opcode::StoreBufferU32:
|
||||
case Opcode::StoreBufferU32x2:
|
||||
case Opcode::StoreBufferU32x3:
|
||||
|
@ -30,7 +30,7 @@ constexpr Type Opaque{Type::Opaque};
|
||||
constexpr Type ScalarReg{Type::ScalarReg};
|
||||
constexpr Type VectorReg{Type::VectorReg};
|
||||
constexpr Type Attribute{Type::Attribute};
|
||||
constexpr Type SystemValue{Type::SystemValue};
|
||||
constexpr Type Patch{Type::Patch};
|
||||
constexpr Type U1{Type::U1};
|
||||
constexpr Type U8{Type::U8};
|
||||
constexpr Type U16{Type::U16};
|
||||
|
@ -60,6 +60,8 @@ OPCODE(SetGotoVariable, Void, U32,
|
||||
OPCODE(GetAttribute, F32, Attribute, U32, U32, )
|
||||
OPCODE(GetAttributeU32, U32, Attribute, U32, )
|
||||
OPCODE(SetAttribute, Void, Attribute, F32, U32, )
|
||||
OPCODE(GetPatch, F32, Patch, )
|
||||
OPCODE(SetPatch, Void, Patch, F32, )
|
||||
|
||||
// Flags
|
||||
OPCODE(GetScc, U1, Void, )
|
||||
|
157
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
157
src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
#pragma clang optimize off
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
||||
/**
|
||||
* Tessellation shaders pass outputs to the next shader using LDS.
|
||||
* The Hull shader stage receives input control points stored in LDS.
|
||||
*
|
||||
* The LDS layout is:
|
||||
* - TCS inputs for patch 0
|
||||
* - TCS inputs for patch 1
|
||||
* - TCS inputs for patch 2
|
||||
* - ...
|
||||
* - TCS outputs for patch 0
|
||||
* - TCS outputs for patch 1
|
||||
* - TCS outputs for patch 2
|
||||
* - ...
|
||||
* - Per-patch TCS outputs for patch 0
|
||||
* - Per-patch TCS outputs for patch 1
|
||||
* - Per-patch TCS outputs for patch 2
|
||||
*
|
||||
* If the Hull stage does not write any new control points the driver will
|
||||
* optimize LDS layout so input and output control point spaces overlap.
|
||||
*
|
||||
* Tessellation factors are stored in the per-patch TCS output block
|
||||
* as well as a factor V# that is automatically bound by the driver.
|
||||
*
|
||||
* This pass attempts to resolve LDS accesses to attribute accesses and correctly
|
||||
* write to the tessellation factor tables. For the latter we replace the
|
||||
* buffer store instruction to factor writes according to their offset.
|
||||
*
|
||||
* LDS stores can either be output control point writes or per-patch data writes.
|
||||
* This is detected by looking at how the address is formed. In any case the calculation
|
||||
* will be of the form a * b + c. For output control points a = output_control_point_id
|
||||
* while for per-patch writes a = patch_id.
|
||||
*
|
||||
* Both patch_id and output_control_point_id are packed in VGPR1 by the driver and shader
|
||||
* uses V_BFE_U32 to extract them. We use the starting bit_pos to determine which is which.
|
||||
*
|
||||
* LDS reads are more tricky as amount of different calculations performed can vary.
|
||||
* The final result, if output control point space is distinct, is of the form:
|
||||
* patch_id * input_control_point_stride * num_control_points_per_input_patch + a
|
||||
* The value "a" can be anything in the range of [0, input_control_point_stride]
|
||||
*
|
||||
* This pass does not attempt to deduce the exact attribute referenced by "a" but rather
|
||||
* only using "a" itself index into input attributes. Those are defined as an array in the shader
|
||||
* layout (location = 0) in vec4[num_control_points_per_input_patch] attr[];
|
||||
* ...
|
||||
* float value = attr[a / in_stride][(a % in_stride) >> 4][(a & 0xF) >> 2];
|
||||
*
|
||||
* This requires knowing in_stride which is not provided to us by the guest.
|
||||
* To deduce it we perform a breadth first search on the arguments of a DS_READ*
|
||||
* looking for a buffer load with offset = 0. This will be the buffer holding tessellation
|
||||
* constants and it contains the value of in_stride we can read at compile time.
|
||||
*
|
||||
* NOTE: This pass must be run before constant propagation as it relies on relatively specific
|
||||
* pattern matching that might be mutated that that optimization pass.
|
||||
*
|
||||
*/
|
||||
|
||||
void HullShaderTransform(const IR::Program& program) {
|
||||
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
|
||||
for (IR::Block* block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
IR::IREmitter ir{*block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4: {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
const auto GetValue = [&](IR::Value data) -> IR::F32 {
|
||||
if (auto* inst = data.TryInstRecursive();
|
||||
inst && inst->GetOpcode() == IR::Opcode::BitCastU32F32) {
|
||||
return IR::F32{inst->Arg(0)};
|
||||
}
|
||||
return ir.BitCast<IR::F32, IR::U32>(IR::U32{data});
|
||||
};
|
||||
const u32 num_dwords = u32(opcode) - u32(IR::Opcode::StoreBufferU32) + 1;
|
||||
const auto factor_idx = info.inst_offset.Value() >> 2;
|
||||
const IR::Value data = inst.Arg(2);
|
||||
inst.Invalidate();
|
||||
if (num_dwords == 1) {
|
||||
ir.SetPatch(IR::PatchFactor(factor_idx), GetValue(data));
|
||||
break;
|
||||
}
|
||||
auto* inst = data.TryInstRecursive();
|
||||
ASSERT(inst && (inst->GetOpcode() == IR::Opcode::CompositeConstructU32x2 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x3 ||
|
||||
inst->GetOpcode() == IR::Opcode::CompositeConstructU32x4));
|
||||
for (s32 i = 0; i < num_dwords; i++) {
|
||||
ir.SetPatch(IR::PatchFactor(factor_idx + i), GetValue(inst->Arg(i)));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
case IR::Opcode::WriteSharedU64: {
|
||||
const u32 num_dwords = opcode == IR::Opcode::WriteSharedU32 ? 1 : 2;
|
||||
const IR::Value data = inst.Arg(1);
|
||||
const auto [data_lo, data_hi] = [&] -> std::pair<IR::U32, IR::U32> {
|
||||
if (num_dwords == 1) {
|
||||
return {IR::U32{data}, IR::U32{}};
|
||||
}
|
||||
const auto* prod = data.InstRecursive();
|
||||
return {IR::U32{prod->Arg(0)}, IR::U32{prod->Arg(1)}};
|
||||
}();
|
||||
const IR::Inst* ds_offset = inst.Arg(0).InstRecursive();
|
||||
const u32 offset_dw = ds_offset->Arg(1).U32() >> 4;
|
||||
IR::Inst* prod = ds_offset->Arg(0).TryInstRecursive();
|
||||
ASSERT(prod && (prod->GetOpcode() == IR::Opcode::IAdd32 ||
|
||||
prod->GetOpcode() == IR::Opcode::IMul32));
|
||||
if (prod->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
prod = prod->Arg(0).TryInstRecursive();
|
||||
ASSERT(prod && prod->GetOpcode() == IR::Opcode::IMul32);
|
||||
}
|
||||
prod = prod->Arg(0).TryInstRecursive();
|
||||
ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldSExtract &&
|
||||
prod->Arg(2).IsImmediate() && prod->Arg(2).U32() == 24);
|
||||
prod = prod->Arg(0).TryInstRecursive();
|
||||
ASSERT(prod && prod->GetOpcode() == IR::Opcode::BitFieldUExtract);
|
||||
const u32 bit_pos = prod->Arg(1).U32();
|
||||
const auto SetOutput = [&ir](IR::U32 value, u32 offset_dw, bool is_patch_const) {
|
||||
const IR::F32 data = ir.BitCast<IR::F32, IR::U32>(value);
|
||||
if (!is_patch_const) {
|
||||
const u32 param = offset_dw >> 2;
|
||||
const u32 comp = offset_dw & 3;
|
||||
ir.SetAttribute(IR::Attribute::Param0 + param, data, comp);
|
||||
} else {
|
||||
ir.SetPatch(IR::PatchGeneric(offset_dw), data);
|
||||
}
|
||||
};
|
||||
ASSERT_MSG(bit_pos == 0 || bit_pos == 8, "Unknown bit extract pos {}", bit_pos);
|
||||
const bool is_patch_const = bit_pos == 0;
|
||||
SetOutput(data_lo, offset_dw, is_patch_const);
|
||||
if (num_dwords > 1) {
|
||||
SetOutput(data_hi, offset_dw + 1, is_patch_const);
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOG_INFO(Render_Vulkan, "{}", IR::DumpProgram(program));
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
@ -18,5 +18,6 @@ void CollectShaderInfoPass(IR::Program& program);
|
||||
void LowerSharedMemToRegisters(IR::Program& program);
|
||||
void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtime_info,
|
||||
Stage stage);
|
||||
void HullShaderTransform(const IR::Program& program);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
@ -23,12 +23,43 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
};
|
||||
|
||||
switch (stage) {
|
||||
case Stage::Local: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::WriteSharedU64: {
|
||||
u32 offset = 0;
|
||||
const auto* addr = inst.Arg(0).InstRecursive();
|
||||
if (addr->GetOpcode() == IR::Opcode::IAdd32) {
|
||||
ASSERT(addr->Arg(1).IsImmediate());
|
||||
offset = addr->Arg(1).U32();
|
||||
}
|
||||
const IR::Inst* pair = inst.Arg(1).InstRecursive();
|
||||
for (s32 i = 0; i < 2; i++) {
|
||||
const auto attrib = IR::Attribute::Param0 + (offset / 16);
|
||||
const auto comp = (offset / 4) % 4;
|
||||
const IR::U32 value = IR::U32{pair->Arg(i)};
|
||||
ir.SetAttribute(attrib, ir.BitCast<IR::F32, IR::U32>(value), comp);
|
||||
offset += 4;
|
||||
}
|
||||
inst.Invalidate();
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
UNREACHABLE();
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
ForEachInstruction([=](IR::IREmitter& ir, IR::Inst& inst) {
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -61,7 +92,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
const auto opcode = inst.GetOpcode();
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -80,7 +112,8 @@ void RingAccessElimination(const IR::Program& program, const RuntimeInfo& runtim
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::StoreBufferU32: {
|
||||
if (!inst.Flags<IR::BufferInstInfo>().ring_access) {
|
||||
const auto info = inst.Flags<IR::BufferInstInfo>();
|
||||
if (!info.system_coherent || !info.globally_coherent) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,22 @@ void Visit(Info& info, IR::Inst& inst) {
|
||||
case IR::Opcode::GetUserData:
|
||||
info.ud_mask.Set(inst.Arg(0).ScalarReg());
|
||||
break;
|
||||
case IR::Opcode::SetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
if (patch <= IR::Patch::TessellationLodBottom) {
|
||||
info.stores_tess_level_outer = true;
|
||||
} else if (patch <= IR::Patch::TessellationLodInteriorV) {
|
||||
info.stores_tess_level_inner = true;
|
||||
} else {
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::GetPatch: {
|
||||
const auto patch = inst.Arg(0).Patch();
|
||||
info.uses_patches |= 1U << IR::GenericPatchIndex(patch);
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::LoadSharedU32:
|
||||
case IR::Opcode::LoadSharedU64:
|
||||
case IR::Opcode::WriteSharedU32:
|
||||
|
28
src/shader_recompiler/ir/patch.cpp
Normal file
28
src/shader_recompiler/ir/patch.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
std::string NameOf(Patch patch) {
|
||||
switch (patch) {
|
||||
case Patch::TessellationLodLeft:
|
||||
return "TessellationLodLeft";
|
||||
case Patch::TessellationLodTop:
|
||||
return "TessellationLodTop";
|
||||
case Patch::TessellationLodRight:
|
||||
return "TessellationLodRight";
|
||||
case Patch::TessellationLodBottom:
|
||||
return "TessellationLodBottom";
|
||||
case Patch::TessellationLodInteriorU:
|
||||
return "TessellationLodInteriorU";
|
||||
case Patch::TessellationLodInteriorV:
|
||||
return "TessellationLodInteriorV";
|
||||
default:
|
||||
const u32 index = u32(patch) - u32(Patch::Component0);
|
||||
return fmt::format("Component{}", index);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
173
src/shader_recompiler/ir/patch.h
Normal file
173
src/shader_recompiler/ir/patch.h
Normal file
@ -0,0 +1,173 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
enum class Patch : u64 {
|
||||
TessellationLodLeft,
|
||||
TessellationLodTop,
|
||||
TessellationLodRight,
|
||||
TessellationLodBottom,
|
||||
TessellationLodInteriorU,
|
||||
TessellationLodInteriorV,
|
||||
Component0,
|
||||
Component1,
|
||||
Component2,
|
||||
Component3,
|
||||
Component4,
|
||||
Component5,
|
||||
Component6,
|
||||
Component7,
|
||||
Component8,
|
||||
Component9,
|
||||
Component10,
|
||||
Component11,
|
||||
Component12,
|
||||
Component13,
|
||||
Component14,
|
||||
Component15,
|
||||
Component16,
|
||||
Component17,
|
||||
Component18,
|
||||
Component19,
|
||||
Component20,
|
||||
Component21,
|
||||
Component22,
|
||||
Component23,
|
||||
Component24,
|
||||
Component25,
|
||||
Component26,
|
||||
Component27,
|
||||
Component28,
|
||||
Component29,
|
||||
Component30,
|
||||
Component31,
|
||||
Component32,
|
||||
Component33,
|
||||
Component34,
|
||||
Component35,
|
||||
Component36,
|
||||
Component37,
|
||||
Component38,
|
||||
Component39,
|
||||
Component40,
|
||||
Component41,
|
||||
Component42,
|
||||
Component43,
|
||||
Component44,
|
||||
Component45,
|
||||
Component46,
|
||||
Component47,
|
||||
Component48,
|
||||
Component49,
|
||||
Component50,
|
||||
Component51,
|
||||
Component52,
|
||||
Component53,
|
||||
Component54,
|
||||
Component55,
|
||||
Component56,
|
||||
Component57,
|
||||
Component58,
|
||||
Component59,
|
||||
Component60,
|
||||
Component61,
|
||||
Component62,
|
||||
Component63,
|
||||
Component64,
|
||||
Component65,
|
||||
Component66,
|
||||
Component67,
|
||||
Component68,
|
||||
Component69,
|
||||
Component70,
|
||||
Component71,
|
||||
Component72,
|
||||
Component73,
|
||||
Component74,
|
||||
Component75,
|
||||
Component76,
|
||||
Component77,
|
||||
Component78,
|
||||
Component79,
|
||||
Component80,
|
||||
Component81,
|
||||
Component82,
|
||||
Component83,
|
||||
Component84,
|
||||
Component85,
|
||||
Component86,
|
||||
Component87,
|
||||
Component88,
|
||||
Component89,
|
||||
Component90,
|
||||
Component91,
|
||||
Component92,
|
||||
Component93,
|
||||
Component94,
|
||||
Component95,
|
||||
Component96,
|
||||
Component97,
|
||||
Component98,
|
||||
Component99,
|
||||
Component100,
|
||||
Component101,
|
||||
Component102,
|
||||
Component103,
|
||||
Component104,
|
||||
Component105,
|
||||
Component106,
|
||||
Component107,
|
||||
Component108,
|
||||
Component109,
|
||||
Component110,
|
||||
Component111,
|
||||
Component112,
|
||||
Component113,
|
||||
Component114,
|
||||
Component115,
|
||||
Component116,
|
||||
Component117,
|
||||
Component118,
|
||||
Component119,
|
||||
};
|
||||
static_assert(static_cast<u64>(Patch::Component119) == 125);
|
||||
|
||||
constexpr bool IsGeneric(Patch patch) noexcept {
|
||||
return patch >= Patch::Component0 && patch <= Patch::Component119;
|
||||
}
|
||||
|
||||
constexpr Patch PatchFactor(u32 index) {
|
||||
return static_cast<Patch>(index);
|
||||
}
|
||||
|
||||
constexpr Patch PatchGeneric(u32 index) {
|
||||
return static_cast<Patch>(static_cast<u32>(Patch::Component0) + index);
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchIndex(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
|
||||
}
|
||||
|
||||
constexpr u32 GenericPatchElement(Patch patch) {
|
||||
return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string NameOf(Patch patch);
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::IR::Patch> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::IR::Patch patch, format_context& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(patch));
|
||||
}
|
||||
};
|
@ -49,7 +49,8 @@ union BufferInstInfo {
|
||||
BitField<0, 1, u32> index_enable;
|
||||
BitField<1, 1, u32> offset_enable;
|
||||
BitField<2, 12, u32> inst_offset;
|
||||
BitField<14, 1, u32> ring_access; // global + system coherency
|
||||
BitField<14, 1, u32> system_coherent;
|
||||
BitField<15, 1, u32> globally_coherent;
|
||||
};
|
||||
|
||||
enum class ScalarReg : u32 {
|
||||
|
@ -15,7 +15,7 @@ enum class Type {
|
||||
ScalarReg = 1 << 1,
|
||||
VectorReg = 1 << 2,
|
||||
Attribute = 1 << 3,
|
||||
SystemValue = 1 << 4,
|
||||
Patch = 1 << 4,
|
||||
U1 = 1 << 5,
|
||||
U8 = 1 << 6,
|
||||
U16 = 1 << 7,
|
||||
|
@ -16,6 +16,8 @@ Value::Value(IR::VectorReg reg) noexcept : type{Type::VectorReg}, vreg{reg} {}
|
||||
|
||||
Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
|
||||
|
||||
Value::Value(IR::Patch patch) noexcept : type{Type::Patch}, patch{patch} {}
|
||||
|
||||
Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
|
||||
|
||||
Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "shader_recompiler/ir/opcodes.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
#include "shader_recompiler/ir/patch.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
@ -34,6 +35,7 @@ public:
|
||||
explicit Value(IR::ScalarReg reg) noexcept;
|
||||
explicit Value(IR::VectorReg reg) noexcept;
|
||||
explicit Value(IR::Attribute value) noexcept;
|
||||
explicit Value(IR::Patch patch) noexcept;
|
||||
explicit Value(bool value) noexcept;
|
||||
explicit Value(u8 value) noexcept;
|
||||
explicit Value(u16 value) noexcept;
|
||||
@ -56,6 +58,7 @@ public:
|
||||
[[nodiscard]] IR::ScalarReg ScalarReg() const;
|
||||
[[nodiscard]] IR::VectorReg VectorReg() const;
|
||||
[[nodiscard]] IR::Attribute Attribute() const;
|
||||
[[nodiscard]] IR::Patch Patch() const;
|
||||
[[nodiscard]] bool U1() const;
|
||||
[[nodiscard]] u8 U8() const;
|
||||
[[nodiscard]] u16 U16() const;
|
||||
@ -75,6 +78,7 @@ private:
|
||||
IR::ScalarReg sreg;
|
||||
IR::VectorReg vreg;
|
||||
IR::Attribute attribute;
|
||||
IR::Patch patch;
|
||||
bool imm_u1;
|
||||
u8 imm_u8;
|
||||
u16 imm_u16;
|
||||
@ -330,6 +334,11 @@ inline IR::Attribute Value::Attribute() const {
|
||||
return attribute;
|
||||
}
|
||||
|
||||
inline IR::Patch Value::Patch() const {
|
||||
DEBUG_ASSERT(type == Type::Patch);
|
||||
return patch;
|
||||
}
|
||||
|
||||
inline bool Value::U1() const {
|
||||
if (IsIdentity()) {
|
||||
return inst->Arg(0).U1();
|
||||
|
@ -60,9 +60,14 @@ IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
// Run optimization passes
|
||||
const auto stage = program.info.stage;
|
||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
if (stage == Stage::Hull) {
|
||||
Shader::Optimization::HullShaderTransform(program);
|
||||
}
|
||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||
if (program.info.stage != Stage::Compute) {
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, stage);
|
||||
if (stage != Stage::Compute) {
|
||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||
}
|
||||
Shader::Optimization::RingAccessElimination(program, runtime_info, program.info.stage);
|
||||
|
@ -23,6 +23,15 @@ enum class Stage : u32 {
|
||||
};
|
||||
constexpr u32 MaxStageTypes = 7;
|
||||
|
||||
enum class LogicalStage : u32 {
|
||||
Fragment,
|
||||
Vertex,
|
||||
TessellationControl,
|
||||
TessellationEval,
|
||||
Geometry,
|
||||
Compute,
|
||||
};
|
||||
|
||||
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||
return static_cast<Stage>(index);
|
||||
}
|
||||
@ -64,12 +73,23 @@ struct VertexRuntimeInfo {
|
||||
u32 num_outputs;
|
||||
std::array<VsOutputMap, 3> outputs;
|
||||
bool emulate_depth_negative_one_to_one{};
|
||||
AmdGpu::TessellationType tess_type;
|
||||
AmdGpu::TessellationTopology tess_topology;
|
||||
AmdGpu::TessellationPartitioning tess_partitioning;
|
||||
|
||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one;
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||
tess_type == other.tess_type && tess_topology == other.tess_topology &&
|
||||
tess_partitioning == other.tess_partitioning;
|
||||
}
|
||||
};
|
||||
|
||||
struct HullRuntimeInfo {
|
||||
u32 output_control_points;
|
||||
|
||||
auto operator<=>(const HullRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
static constexpr auto GsMaxOutputStreams = 4u;
|
||||
using GsOutputPrimTypes = std::array<AmdGpu::GsOutputPrimitiveType, GsMaxOutputStreams>;
|
||||
struct GeometryRuntimeInfo {
|
||||
@ -152,6 +172,7 @@ struct RuntimeInfo {
|
||||
union {
|
||||
ExportRuntimeInfo es_info;
|
||||
VertexRuntimeInfo vs_info;
|
||||
HullRuntimeInfo hs_info;
|
||||
GeometryRuntimeInfo gs_info;
|
||||
FragmentRuntimeInfo fs_info;
|
||||
ComputeRuntimeInfo cs_info;
|
||||
@ -174,6 +195,8 @@ struct RuntimeInfo {
|
||||
return es_info == other.es_info;
|
||||
case Stage::Geometry:
|
||||
return gs_info == other.gs_info;
|
||||
case Stage::Hull:
|
||||
return hs_info == other.hs_info;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
@ -956,6 +956,7 @@ struct Liverpool {
|
||||
enum VgtStages : u32 {
|
||||
Vs = 0u, // always enabled
|
||||
EsGs = 0xB0u,
|
||||
LsHs = 0x45u,
|
||||
};
|
||||
|
||||
VgtStages raw;
|
||||
@ -1059,6 +1060,20 @@ struct Liverpool {
|
||||
};
|
||||
};
|
||||
|
||||
union LsHsConfig {
|
||||
u32 raw;
|
||||
BitField<0, 8, u32> num_patches;
|
||||
BitField<8, 6, u32> hs_input_control_points;
|
||||
BitField<14, 6, u32> hs_output_control_points;
|
||||
};
|
||||
|
||||
union TessellationConfig {
|
||||
u32 raw;
|
||||
BitField<0, 2, TessellationType> type;
|
||||
BitField<2, 3, TessellationPartitioning> partitioning;
|
||||
BitField<5, 3, TessellationTopology> topology;
|
||||
};
|
||||
|
||||
union Eqaa {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> max_anchor_samples;
|
||||
@ -1200,9 +1215,10 @@ struct Liverpool {
|
||||
BitField<0, 11, u32> vgt_gs_max_vert_out;
|
||||
INSERT_PADDING_WORDS(0xA2D5 - 0xA2CE - 1);
|
||||
ShaderStageEnable stage_enable;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
LsHsConfig ls_hs_config;
|
||||
u32 vgt_gs_vert_itemsize[4];
|
||||
INSERT_PADDING_WORDS(4);
|
||||
TessellationConfig tess_config;
|
||||
INSERT_PADDING_WORDS(3);
|
||||
PolygonOffset poly_offset;
|
||||
GsInstances vgt_gs_instance_cnt;
|
||||
StreamOutConfig vgt_strmout_config;
|
||||
@ -1445,6 +1461,7 @@ static_assert(GFX6_3D_REG_INDEX(vgt_gsvs_ring_itemsize) == 0xA2AC);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_max_vert_out) == 0xA2CE);
|
||||
static_assert(GFX6_3D_REG_INDEX(stage_enable) == 0xA2D5);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_vert_itemsize[0]) == 0xA2D7);
|
||||
static_assert(GFX6_3D_REG_INDEX(tess_config) == 0xA2DB);
|
||||
static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_gs_instance_cnt) == 0xA2E4);
|
||||
static_assert(GFX6_3D_REG_INDEX(vgt_strmout_config) == 0xA2E5);
|
||||
|
@ -21,6 +21,26 @@ enum class FpDenormMode : u32 {
|
||||
InOutAllow = 3,
|
||||
};
|
||||
|
||||
enum class TessellationType : u32 {
|
||||
Isoline = 0,
|
||||
Triangle = 1,
|
||||
Quad = 2,
|
||||
};
|
||||
|
||||
enum class TessellationPartitioning : u32 {
|
||||
Integer = 0,
|
||||
Pow2 = 1,
|
||||
FracOdd = 2,
|
||||
FracEven = 3,
|
||||
};
|
||||
|
||||
enum class TessellationTopology : u32 {
|
||||
Point = 0,
|
||||
Line = 1,
|
||||
TriangleCw = 2,
|
||||
TriangleCcw = 3,
|
||||
};
|
||||
|
||||
// See `VGT_PRIMITIVE_TYPE` description in [Radeon Sea Islands 3D/Compute Register Reference Guide]
|
||||
enum class PrimitiveType : u32 {
|
||||
None = 0,
|
||||
|
@ -52,7 +52,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
|
||||
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
|
||||
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
const auto& vs_info = GetStage(Shader::Stage::Vertex);
|
||||
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
if (attrib.UsesStepRates()) {
|
||||
// Skip attribute binding as the data will be pulled by shader
|
||||
@ -204,7 +204,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
|
||||
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
|
||||
shader_stages;
|
||||
auto stage = u32(Shader::Stage::Vertex);
|
||||
auto stage = u32(LogicalStage::Vertex);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eVertex,
|
||||
@ -212,7 +212,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Geometry);
|
||||
stage = u32(LogicalStage::Geometry);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eGeometry,
|
||||
@ -220,7 +220,23 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(Shader::Stage::Fragment);
|
||||
stage = u32(LogicalStage::TessellationControl);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationControl,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(LogicalStage::TessellationEval);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eTessellationEvaluation,
|
||||
.module = modules[stage],
|
||||
.pName = "main",
|
||||
});
|
||||
}
|
||||
stage = u32(LogicalStage::Fragment);
|
||||
if (infos[stage]) {
|
||||
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
|
||||
.stage = vk::ShaderStageFlagBits::eFragment,
|
||||
|
@ -73,7 +73,7 @@ public:
|
||||
|
||||
bool IsEmbeddedVs() const noexcept {
|
||||
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
|
||||
return key.stage_hashes[u32(Shader::Stage::Vertex)] == EmbeddedVsHash;
|
||||
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
|
||||
}
|
||||
|
||||
auto GetWriteMasks() const {
|
||||
|
@ -22,6 +22,8 @@ extern std::unique_ptr<Vulkan::Presenter> presenter;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using Shader::LogicalStage;
|
||||
using Shader::Stage;
|
||||
using Shader::VsOutput;
|
||||
|
||||
constexpr static std::array DescriptorHeapSizes = {
|
||||
@ -78,7 +80,7 @@ void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_stage) {
|
||||
auto info = Shader::RuntimeInfo{stage};
|
||||
const auto& regs = liverpool->regs;
|
||||
const auto BuildCommon = [&](const auto& program) {
|
||||
@ -89,20 +91,34 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
info.fp_round_mode32 = program.settings.fp_round_mode32;
|
||||
};
|
||||
switch (stage) {
|
||||
case Shader::Stage::Export: {
|
||||
case Stage::Local: {
|
||||
BuildCommon(regs.ls_program);
|
||||
break;
|
||||
}
|
||||
case Stage::Hull: {
|
||||
BuildCommon(regs.hs_program);
|
||||
info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();
|
||||
break;
|
||||
}
|
||||
case Stage::Export: {
|
||||
BuildCommon(regs.es_program);
|
||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Vertex: {
|
||||
case Stage::Vertex: {
|
||||
BuildCommon(regs.vs_program);
|
||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||
info.vs_info.emulate_depth_negative_one_to_one =
|
||||
!instance.IsDepthClipControlSupported() &&
|
||||
regs.clipper_control.clip_space == Liverpool::ClipSpace::MinusWToW;
|
||||
if (l_stage == LogicalStage::TessellationEval) {
|
||||
info.vs_info.tess_type = regs.tess_config.type;
|
||||
info.vs_info.tess_topology = regs.tess_config.topology;
|
||||
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Geometry: {
|
||||
case Stage::Geometry: {
|
||||
BuildCommon(regs.gs_program);
|
||||
auto& gs_info = info.gs_info;
|
||||
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||
@ -121,7 +137,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
DumpShader(gs_info.vs_copy, gs_info.vs_copy_hash, Shader::Stage::Vertex, 0, "copy.bin");
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
case Stage::Fragment: {
|
||||
BuildCommon(regs.ps_program);
|
||||
info.fs_info.en_flags = regs.ps_input_ena;
|
||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||
@ -143,7 +159,7 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
case Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.num_allocated_vgprs = regs.cs_program.settings.num_vgprs * 4;
|
||||
@ -304,7 +320,7 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
fetch_shader = std::nullopt;
|
||||
|
||||
Shader::Backend::Bindings binding{};
|
||||
const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool {
|
||||
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
|
||||
const auto stage_in_idx = static_cast<u32>(stage_in);
|
||||
const auto stage_out_idx = static_cast<u32>(stage_out);
|
||||
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
||||
@ -331,23 +347,23 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
auto params = Liverpool::GetParams(*pgm);
|
||||
std::optional<Shader::Gcn::FetchShaderData> fetch_shader_;
|
||||
std::tie(infos[stage_out_idx], modules[stage_out_idx], fetch_shader_,
|
||||
key.stage_hashes[stage_out_idx]) = GetProgram(stage_in, params, binding);
|
||||
key.stage_hashes[stage_out_idx]) =
|
||||
GetProgram(stage_in, stage_out, params, binding);
|
||||
if (fetch_shader_) {
|
||||
fetch_shader = fetch_shader_;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
const auto& TryBindStage = [&](Shader::Stage stage) { return TryBindStageRemap(stage, stage); };
|
||||
|
||||
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
||||
// These checks are temporary until all functionality is implemented.
|
||||
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
||||
};
|
||||
|
||||
TryBindStage(Shader::Stage::Fragment);
|
||||
infos.fill(nullptr);
|
||||
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
|
||||
|
||||
const auto* fs_info = infos[static_cast<u32>(Shader::Stage::Fragment)];
|
||||
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
|
||||
switch (regs.stage_enable.raw) {
|
||||
@ -355,22 +371,36 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStageRemap(Shader::Stage::Export, Shader::Stage::Vertex)) {
|
||||
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Shader::Stage::Geometry)) {
|
||||
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
|
||||
if (!instance.IsTessellationSupported()) {
|
||||
break;
|
||||
}
|
||||
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TryBindStage(Shader::Stage::Vertex);
|
||||
infos[static_cast<u32>(Shader::Stage::Geometry)] = nullptr;
|
||||
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::Stage::Vertex)];
|
||||
const auto vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
u32 vertex_binding = 0;
|
||||
for (const auto& attrib : fetch_shader->attributes) {
|
||||
@ -430,7 +460,7 @@ bool PipelineCache::RefreshComputeKey() {
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||
std::tie(infos[0], modules[0], fetch_shader, compute_key.value) =
|
||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||
GetProgram(Shader::Stage::Compute, LogicalStage::Compute, cs_params, binding);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -466,13 +496,13 @@ vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||
return module;
|
||||
}
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>, u64>
|
||||
PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
const auto runtime_info = BuildRuntimeInfo(stage);
|
||||
PipelineCache::Result PipelineCache::GetProgram(Stage stage, LogicalStage l_stage,
|
||||
Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding) {
|
||||
const auto runtime_info = BuildRuntimeInfo(stage, l_stage);
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||
if (new_program) {
|
||||
it_pgm.value() = std::make_unique<Program>(stage, params);
|
||||
it_pgm.value() = std::make_unique<Program>(stage, l_stage, params);
|
||||
auto& program = it_pgm.value();
|
||||
auto start = binding;
|
||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||
@ -491,7 +521,7 @@ PipelineCache::GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
|
||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = Shader::Info(stage, params);
|
||||
auto new_info = Shader::Info(stage, l_stage, params);
|
||||
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
} else {
|
||||
|
@ -34,11 +34,13 @@ struct Program {
|
||||
vk::ShaderModule module;
|
||||
Shader::StageSpecialization spec;
|
||||
};
|
||||
using ModuleList = boost::container::small_vector<Module, 8>;
|
||||
|
||||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
ModuleList modules;
|
||||
|
||||
explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
|
||||
explicit Program(Shader::Stage stage, Shader::LogicalStage l_stage, Shader::ShaderParams params)
|
||||
: info{stage, l_stage, params} {}
|
||||
|
||||
void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
|
||||
modules.emplace_back(module, std::move(spec));
|
||||
@ -55,10 +57,10 @@ public:
|
||||
|
||||
const ComputePipeline* GetComputePipeline();
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, std::optional<Shader::Gcn::FetchShaderData>,
|
||||
u64>
|
||||
GetProgram(Shader::Stage stage, Shader::ShaderParams params,
|
||||
Shader::Backend::Bindings& binding);
|
||||
using Result = std::tuple<const Shader::Info*, vk::ShaderModule,
|
||||
std::optional<Shader::Gcn::FetchShaderData>, u64>;
|
||||
Result GetProgram(Shader::Stage stage, Shader::LogicalStage l_stage,
|
||||
Shader::ShaderParams params, Shader::Backend::Bindings& binding);
|
||||
|
||||
std::optional<vk::ShaderModule> ReplaceShader(vk::ShaderModule module,
|
||||
std::span<const u32> spv_code);
|
||||
@ -74,7 +76,7 @@ private:
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
Shader::Backend::Bindings& binding);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage);
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, Shader::LogicalStage l_stage);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
Loading…
Reference in New Issue
Block a user