From 0eaa7d58599a336898b54eb2a680e13a5c9a2feb Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 24 May 2024 03:02:21 +0300 Subject: [PATCH] shader_recompiler: Implement attribute loads/stores --- CMakeLists.txt | 3 + src/main.cpp | 1 - .../backend/spirv/emit_spirv.cpp | 6 +- .../spirv/emit_spirv_context_get_set.cpp | 17 +- .../backend/spirv/emit_spirv_instructions.h | 6 +- .../backend/spirv/spirv_emit_context.cpp | 105 +++++++++++- .../backend/spirv/spirv_emit_context.h | 18 +-- .../frontend/fetch_shader.cpp | 4 +- src/shader_recompiler/frontend/fetch_shader.h | 3 +- .../frontend/structured_control_flow.cpp | 12 +- .../frontend/structured_control_flow.h | 4 +- .../frontend/translate/translate.cpp | 40 ++++- .../frontend/translate/translate.h | 10 +- .../frontend/translate/vector_alu.cpp | 5 +- .../translate/vector_interpolation.cpp | 4 +- src/shader_recompiler/ir/attribute.h | 8 +- src/shader_recompiler/ir/ir_emitter.cpp | 8 - src/shader_recompiler/ir/ir_emitter.h | 8 +- .../ir/passes/info_collection.cpp | 33 ++++ src/shader_recompiler/ir/passes/passes.h | 1 + src/shader_recompiler/ir/program.h | 13 +- src/shader_recompiler/recompiler.cpp | 18 +-- src/shader_recompiler/recompiler.h | 10 +- src/shader_recompiler/runtime_info.h | 149 +++++++----------- src/video_core/amdgpu/liverpool.h | 21 ++- src/video_core/amdgpu/pixel_format.cpp | 34 ++++ src/video_core/amdgpu/pixel_format.h | 15 ++ .../renderer_vulkan/vk_pipeline_cache.cpp | 40 ++++- 28 files changed, 399 insertions(+), 197 deletions(-) create mode 100644 src/shader_recompiler/ir/passes/info_collection.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index dae1e4f27..150c10013 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -324,6 +324,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/frontend/control_flow_graph.h src/shader_recompiler/frontend/decode.cpp src/shader_recompiler/frontend/decode.h + src/shader_recompiler/frontend/fetch_shader.cpp + src/shader_recompiler/frontend/fetch_shader.h src/shader_recompiler/frontend/format.cpp src/shader_recompiler/frontend/instruction.cpp src/shader_recompiler/frontend/instruction.h @@ -333,6 +335,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp src/shader_recompiler/ir/passes/resource_tracking_pass.cpp src/shader_recompiler/ir/passes/constant_propogation_pass.cpp + src/shader_recompiler/ir/passes/info_collection.cpp src/shader_recompiler/ir/passes/passes.h src/shader_recompiler/ir/abstract_syntax_list.h src/shader_recompiler/ir/attribute.cpp diff --git a/src/main.cpp b/src/main.cpp index 2a7f839eb..431960461 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -20,7 +20,6 @@ #include "core/libraries/libs.h" #include "core/libraries/videoout/video_out.h" #include "core/linker.h" -#include "core/tls.h" #include "input/controller.h" #include "sdl_window.h" diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index f341d465e..fb9c67d62 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) { void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); spv::ExecutionModel execution_model{}; - switch (program.stage) { + switch (program.info.stage) { case Stage::Compute: { // const std::array workgroup_size{program.workgroup_size}; // execution_model = spv::ExecutionModel::GLCompute; @@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { // } break; default: - throw NotImplementedException("Stage {}", u32(program.stage)); + throw NotImplementedException("Stage {}", u32(program.info.stage)); } ctx.AddEntryPoint(execution_model, main, "main", interfaces); } @@ -222,7 +222,7 @@ std::vector EmitSPIRV(const Profile& profile, IR::Program& program, Binding EmitContext ctx{profile, program, bindings}; const Id main{DefineMain(ctx, program)}; DefineEntryPoint(program, ctx, main); - if (program.stage == Stage::Vertex) { + if (program.info.stage == Stage::Vertex) { ctx.AddExtension("SPV_KHR_shader_draw_parameters"); ctx.AddCapability(spv::Capability::DrawParameters); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0ba72f2a6..2ed4e29d6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -10,12 +10,11 @@ namespace { Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; - const auto& info{ctx.output_params.at(index).at(element)}; + const auto& info{ctx.output_params.at(index)}; if (info.num_components == 1) { return info.id; } else { - const u32 index_element{element - info.first_element}; - return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element)); + return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element)); } } switch (attr) { @@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR:: throw LogicError("Unreachable instruction"); } -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { - const u32 element{static_cast(attr) % 4}; +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; if (!ValidId(param.id)) { // Attribute is disabled or varying component is not written - return ctx.ConstF32(element == 3 ? 1.0f : 0.0f); + return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); } - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))}; + const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; return ctx.OpLoad(param.component_type, pointer); } throw NotImplementedException("Read attribute {}", attr); } -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) { switch (attr) { case IR::Attribute::VertexId: return ctx.OpLoad(ctx.U32[1], ctx.vertex_index); @@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { } void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) { - if (attr == IR::Attribute::Param0) { - return; - } const Id pointer{OutputAttrPointer(ctx, attr, element)}; ctx.OpStore(pointer, value); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 20d58e908..24685275f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu const IR::Value& offset); Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index, const IR::Value& offset); -Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); -Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); -void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element); +Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); +void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); void EmitSetSampleMask(EmitContext& ctx, Id value); void EmitSetFragDepth(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index e9a557669..376175dc9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar } // Anonymous namespace EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings) - : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} { + : Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} { u32& uniform_binding{bindings.unified}; u32& storage_binding{bindings.unified}; u32& texture_binding{bindings.unified}; @@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() { u32_zero_value = ConstU32(0U); f32_zero_value = ConstF32(0.0f); + input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); + input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); + input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32"); + output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); } @@ -107,26 +111,119 @@ void EmitContext::DefineInterfaces(const IR::Program& program) { DefineOutputs(program); } +Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { + switch (fmt) { + case AmdGpu::NumberFormat::Float: + case AmdGpu::NumberFormat::Unorm: + return ctx.F32[4]; + case AmdGpu::NumberFormat::Sint: + return ctx.S32[4]; + case AmdGpu::NumberFormat::Uint: + return ctx.U32[4]; + case AmdGpu::NumberFormat::Sscaled: + return ctx.F32[4]; + case AmdGpu::NumberFormat::Uscaled: + return ctx.F32[4]; + default: + break; + } + throw InvalidArgument("Invalid attribute type {}", fmt); +} + +EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) { + switch (fmt) { + case AmdGpu::NumberFormat::Float: + case AmdGpu::NumberFormat::Unorm: + return {id, input_f32, F32[1], 4}; + case AmdGpu::NumberFormat::Uint: + return {id, input_u32, U32[1], 4}; + case AmdGpu::NumberFormat::Sint: + return {id, input_s32, S32[1], 4}; + case AmdGpu::NumberFormat::Sscaled: + return {id, input_f32, F32[1], 4}; + case AmdGpu::NumberFormat::Uscaled: + return {id, input_f32, F32[1], 4}; + default: + break; + } + throw InvalidArgument("Invalid attribute type {}", fmt); +} + +Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { + switch (default_value) { + case 0: + return ctx.ConstF32(0.f, 0.f, 0.f, 0.f); + case 1: + return ctx.ConstF32(0.f, 0.f, 0.f, 1.f); + case 2: + return ctx.ConstF32(1.f, 1.f, 1.f, 0.f); + case 3: + return ctx.ConstF32(1.f, 1.f, 1.f, 1.f); + default: + UNREACHABLE(); + } +} + void EmitContext::DefineInputs(const IR::Program& program) { + const auto& info = program.info; switch (stage) { case Stage::Vertex: vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); + for (const auto& input : info.vs_inputs) { + const Id type{GetAttributeType(*this, input.fmt)}; + const Id id{DefineInput(type, input.binding)}; + Name(id, fmt::format("vs_in_attr{}", input.binding)); + input_params[input.binding] = GetAttributeInfo(input.fmt, id); + } break; + case Stage::Fragment: + for (const auto& input : info.ps_inputs) { + if (input.is_default) { + input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]}; + continue; + } + const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; + const u32 num_components = info.loads.NumComponents(param); + const Id type{F32[num_components]}; + const Id id{DefineInput(type, input.semantic)}; + if (input.is_flat) { + Decorate(id, spv::Decoration::Flat); + } + Name(id, fmt::format("fs_in_attr{}", input.semantic)); + input_params[input.semantic] = {id, input_f32, F32[1], num_components}; + } default: break; } } void EmitContext::DefineOutputs(const IR::Program& program) { + const auto& info = program.info; switch (stage) { case Stage::Vertex: output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); + for (u32 i = 0; i < IR::NumParams; i++) { + const IR::Attribute param{IR::Attribute::Param0 + i}; + if (!info.stores.GetAny(param)) { + continue; + } + const u32 num_components = info.stores.NumComponents(param); + const Id id{DefineOutput(F32[num_components], i)}; + Name(id, fmt::format("out_attr{}", i)); + output_params[i] = {id, output_f32, F32[1], num_components}; + } break; case Stage::Fragment: - frag_color[0] = DefineOutput(F32[4], 0); - Name(frag_color[0], fmt::format("frag_color{}", 0)); - interfaces.push_back(frag_color[0]); + for (u32 i = 0; i < IR::NumRenderTargets; i++) { + const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; + if (!info.stores.GetAny(mrt)) { + continue; + } + frag_color[i] = DefineOutput(F32[4], i); + Name(frag_color[i], fmt::format("frag_color{}", i)); + interfaces.push_back(frag_color[i]); + } break; default: break; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index bf78a4459..26298e386 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -135,6 +135,9 @@ public: Id u32_zero_value{}; Id f32_zero_value{}; + Id input_u32{}; + Id input_f32{}; + Id input_s32{}; Id output_u32{}; Id output_f32{}; @@ -145,25 +148,22 @@ public: Id base_vertex{}; std::array frag_color{}; - struct InputParamInfo { + struct SpirvAttribute { Id id; Id pointer_type; Id component_type; + u32 num_components; }; - std::array input_params{}; - - struct ParamElementInfo { - Id id{}; - u32 first_element{}; - u32 num_components{}; - }; - std::array, 32> output_params{}; + std::array input_params{}; + std::array output_params{}; private: void DefineArithmeticTypes(); void DefineInterfaces(const IR::Program& program); void DefineInputs(const IR::Program& program); void DefineOutputs(const IR::Program& program); + + SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 1ae8c8944..b17fbc522 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -32,9 +32,9 @@ namespace Shader::Gcn { * We take the reverse way, extract the original input semantics from these instructions. **/ -std::vector ParseFetchShader(std::span code) { +std::vector ParseFetchShader(const u32* code) { std::vector attributes; - GcnCodeSlice code_slice(code.data(), code.data() + std::numeric_limits::max()); + GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; struct VsharpLoad { diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 636cd5e79..627e19aa0 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -3,7 +3,6 @@ #pragma once -#include #include #include "common/types.h" @@ -17,6 +16,6 @@ struct VertexAttribute { u8 dword_offset; ///< The dword offset of the V# that describes this attribute. }; -std::vector ParseFetchShader(std::span code); +std::vector ParseFetchShader(const u32* code); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index 3464a88e5..2064c6a58 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -600,9 +600,9 @@ public: TranslatePass(ObjectPool& inst_pool_, ObjectPool& block_pool_, ObjectPool& stmt_pool_, Statement& root_stmt, IR::AbstractSyntaxList& syntax_list_, std::span inst_list_, - Stage stage_) + Info& info_) : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, - syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} { + syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} { Visit(root_stmt, nullptr, nullptr); IR::Block& first_block{*syntax_list.front().data.block}; @@ -633,7 +633,7 @@ private: ensure_block(); const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; - Translate(current_block, stage, inst_list.subspan(start, size)); + Translate(current_block, inst_list.subspan(start, size), info); break; } case StatementType::SetVariable: { @@ -811,17 +811,17 @@ private: IR::AbstractSyntaxList& syntax_list; const Block dummy_flow_block{}; std::span inst_list; - Stage stage; + Info& info; }; } // Anonymous namespace IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, - CFG& cfg, Stage stage) { + CFG& cfg, Info& info) { ObjectPool stmt_pool{64}; GotoPass goto_pass{cfg, stmt_pool}; Statement& root{goto_pass.RootStatement()}; IR::AbstractSyntaxList syntax_list; - TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage}; + TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info}; return syntax_list; } diff --git a/src/shader_recompiler/frontend/structured_control_flow.h b/src/shader_recompiler/frontend/structured_control_flow.h index fa7b6738e..09814349c 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.h +++ b/src/shader_recompiler/frontend/structured_control_flow.h @@ -10,13 +10,13 @@ #include "shader_recompiler/object_pool.h" namespace Shader { -enum class Stage : u32; +struct Info; } namespace Shader::Gcn { [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool& inst_pool, ObjectPool& block_pool, CFG& cfg, - Stage stage); + Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 002351ca8..6dc85d168 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -3,13 +3,15 @@ #include "shader_recompiler/exception.h" #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" namespace Shader::Gcn { -Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} { +Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} { IR::VectorReg dst_vreg = IR::VectorReg::V0; - switch (stage) { + switch (info.stage) { case Stage::Vertex: // https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23 ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); @@ -92,11 +94,38 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { } } -void Translate(IR::Block* block, Stage stage, std::span inst_list) { +void Translator::EmitFetch(const GcnInst& inst) { + // Read the pointer to the fetch shader assembly. + const u32 sgpr_base = inst.src[0].code; + const u32* code; + std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); + + // Parse the assembly to generate a list of attributes. + const auto attribs = ParseFetchShader(code); + for (const auto& attrib : attribs) { + IR::VectorReg dst_reg{attrib.dest_vgpr}; + const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; + for (u32 i = 0; i < attrib.num_elements; i++) { + ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i)); + } + + // Read the V# of the attribute to figure out component number and type. + const auto buffer = info.ReadUd(attrib.sgpr_base, + attrib.dword_offset); + const u32 num_components = AmdGpu::NumComponents(buffer.data_format); + info.vs_inputs.push_back({ + .fmt = buffer.num_format, + .binding = attrib.semantic, + .num_components = std::min(attrib.num_elements, num_components), + }); + } +} + +void Translate(IR::Block* block, std::span inst_list, Info& info) { if (inst_list.empty()) { return; } - Translator translator{block, stage}; + Translator translator{block, info}; for (const auto& inst : inst_list) { switch (inst.opcode) { case Opcode::S_MOV_B32: @@ -115,6 +144,9 @@ void Translate(IR::Block* block, Stage stage, std::span inst_list translator.V_MUL_F32(inst); break; case Opcode::S_SWAPPC_B64: + ASSERT(info.stage == Stage::Vertex); + translator.EmitFetch(inst); + break; case Opcode::S_WAITCNT: break; // Ignore for now. case Opcode::S_BUFFER_LOAD_DWORDX16: diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 2f972bef9..7e0186f3b 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -7,9 +7,10 @@ #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/basic_block.h" #include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/runtime_info.h" namespace Shader { -enum class Stage : u32; +struct Info; } namespace Shader::Gcn { @@ -25,7 +26,9 @@ enum class ConditionOp : u32 { class Translator { public: - explicit Translator(IR::Block* block_, Stage stage); + explicit Translator(IR::Block* block_, Info& info); + + void EmitFetch(const GcnInst& inst); // Scalar ALU void S_MOV(const GcnInst& inst); @@ -66,8 +69,9 @@ private: private: IR::Block* block; IR::IREmitter ir; + Info& info; }; -void Translate(IR::Block* block, Stage stage, std::span inst_list); +void Translate(IR::Block* block, std::span inst_list, Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index cbb3268c8..99cebdd22 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) { void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])), - ir.FPConvert(16, GetSrc(inst.src[1]))); - ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32)); + const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1])); + ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32)); } void Translator::V_MUL_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 47c98cd7d..7d41d4306 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -7,7 +7,9 @@ namespace Shader::Gcn { void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const IR::VectorReg dst_reg{inst.dst[0].code}; - const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr}; + auto& attr = info.ps_inputs.at(inst.control.vintrp.attr); + attr.semantic = inst.control.vintrp.attr; + const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index a4d76dbf0..687d3ad44 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -72,10 +72,12 @@ enum class Attribute : u64 { LocalInvocationId = 75, LocalInvocationIndex = 76, FragCoord = 77, + Max, }; -constexpr size_t EXP_NUM_POS = 4; -constexpr size_t EXP_NUM_PARAM = 32; +constexpr size_t NumAttributes = static_cast(Attribute::Max); +constexpr size_t NumRenderTargets = 8; +constexpr size_t NumParams = 32; [[nodiscard]] bool IsParam(Attribute attribute) noexcept; @@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32; if (result > static_cast(Attribute::Param31)) { throw LogicError("Overflow on register arithmetic"); } - if (result < static_cast(Attribute::Param0)) { + if (result < static_cast(Attribute::RenderTarget0)) { throw LogicError("Underflow on register arithmetic"); } return static_cast(result); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index aae23ef64..8bea18e07 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) { Inst(Opcode::SetVcc, value); } -F32 IREmitter::GetAttribute(IR::Attribute attribute) { - return GetAttribute(attribute, 0); -} - F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttribute, attribute, Imm32(comp)); } -U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { - return GetAttributeU32(attribute, 0); -} - U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) { return Inst(Opcode::GetAttributeU32, attribute, Imm32(comp)); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 8c8f657ee..f6bc8807a 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -58,11 +58,9 @@ public: [[nodiscard]] U1 Condition(IR::Condition cond); - [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); - [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp); - [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); - [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp); - void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp); + [[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0); + [[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0); + void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0); [[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset); void WriteShared(int bit_size, const Value& value, const U32& offset); diff --git a/src/shader_recompiler/ir/passes/info_collection.cpp b/src/shader_recompiler/ir/passes/info_collection.cpp new file mode 100644 index 000000000..99aedbc4f --- /dev/null +++ b/src/shader_recompiler/ir/passes/info_collection.cpp @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/ir/program.h" + +namespace Shader::Optimization { + +void Visit(Info& info, IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::GetAttribute: + case IR::Opcode::GetAttributeU32: { + info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32()); + break; + } + case IR::Opcode::SetAttribute: { + info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32()); + break; + } + default: + break; + } +} + +void CollectShaderInfoPass(IR::Program& program) { + Info& info{program.info}; + for (IR::Block* const block : program.post_order_blocks) { + for (IR::Inst& inst : block->Instructions()) { + Visit(info, inst); + } + } +} + +} // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/passes.h b/src/shader_recompiler/ir/passes/passes.h index e4baae92b..915bb80e2 100644 --- a/src/shader_recompiler/ir/passes/passes.h +++ b/src/shader_recompiler/ir/passes/passes.h @@ -13,5 +13,6 @@ void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::BlockList& program); void ConstantPropagationPass(IR::BlockList& program); void ResourceTrackingPass(IR::Program& program); +void CollectShaderInfoPass(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index 2efb6f507..27e33b119 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -3,27 +3,22 @@ #pragma once -#include #include #include "shader_recompiler/frontend/instruction.h" #include "shader_recompiler/ir/abstract_syntax_list.h" #include "shader_recompiler/ir/basic_block.h" - -namespace Shader { -enum class Stage : u32; -} +#include "shader_recompiler/runtime_info.h" namespace Shader::IR { -static constexpr size_t NumUserDataRegs = 16; - struct Program { + explicit Program(const Info&& info_) : info{info_} {} + AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; std::vector ins_list; - std::array user_data; - Stage stage; + Info info; }; [[nodiscard]] std::string DumpProgram(const Program& program); diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 3215ed6dd..86173b5b8 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -2,7 +2,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/frontend/control_flow_graph.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/structured_control_flow.h" @@ -30,10 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { return blocks; } -std::vector TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Stage stage, - std::span ud_regs, - std::span token) { +IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, + std::span token, const Info&& info) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm"); @@ -47,7 +44,7 @@ std::vector TranslateProgram(ObjectPool& inst_pool, file.close(); // Decode and save instructions - IR::Program program; + IR::Program program{std::move(info)}; program.ins_list.reserve(token.size()); while (!slice.atEnd()) { program.ins_list.emplace_back(decoder.decodeInstruction(slice)); @@ -58,11 +55,9 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::CFG cfg{gcn_block_pool, program.ins_list}; // Structurize control flow graph and create program. - program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage); + program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info); program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); - program.stage = stage; - std::ranges::copy(ud_regs, program.user_data.begin()); // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); @@ -70,14 +65,13 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Shader::Optimization::IdentityRemovalPass(program.blocks); Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::DeadCodeEliminationPass(program.blocks); + Shader::Optimization::CollectShaderInfoPass(program); for (const auto& block : program.blocks) { fmt::print("{}\n", IR::DumpBlock(*block)); } - // TODO: Pass profile from vulkan backend - const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); - return code; + return program; } } // namespace Shader diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index c746c3d8f..c3a5c7023 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -3,6 +3,8 @@ #pragma once +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/object_pool.h" #include "shader_recompiler/ir/program.h" namespace Shader { @@ -26,9 +28,9 @@ struct BinaryInfo { u32 crc32; }; -[[nodiscard]] std::vector TranslateProgram(ObjectPool& inst_pool, - ObjectPool& block_pool, Stage stage, - std::span ud_regs, - std::span code); +[[nodiscard]] IR::Program TranslateProgram(ObjectPool& inst_pool, + ObjectPool& block_pool, + std::span code, + const Info&& info); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 052108b7c..959768af2 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -3,39 +3,16 @@ #pragma once -#include -#include -#include "shader_recompiler/ir/type.h" +#include +#include +#include "common/assert.h" +#include "common/types.h" +#include "shader_recompiler/ir/attribute.h" +#include "video_core/amdgpu/pixel_format.h" namespace Shader { -enum class AttributeType : u8 { - Float, - SignedInt, - UnsignedInt, - SignedScaled, - UnsignedScaled, - Disabled, -}; - -enum class InputTopology { - Points, - Lines, - LinesAdjacency, - Triangles, - TrianglesAdjacency, -}; - -enum class CompareFunction { - Never, - Less, - Equal, - LessThanEqual, - Greater, - NotEqual, - GreaterThanEqual, - Always, -}; +static constexpr size_t NumUserDataRegs = 16; enum class Stage : u32 { Vertex, @@ -62,78 +39,60 @@ enum class TextureType : u32 { }; constexpr u32 NUM_TEXTURE_TYPES = 7; -enum class Interpolation { - Smooth, - Flat, - NoPerspective, -}; - -struct ConstantBufferDescriptor { - u32 index; - u32 count; - - auto operator<=>(const ConstantBufferDescriptor&) const = default; -}; - -struct TextureDescriptor { - TextureType type; - bool is_eud; - bool is_depth; - bool is_multisample; - bool is_storage; - u32 count; - u32 eud_offset_dwords; - u32 ud_index_dwords; - - auto operator<=>(const TextureDescriptor&) const = default; -}; -using TextureDescriptors = boost::container::small_vector; - struct Info { - bool uses_workgroup_id{}; - bool uses_local_invocation_id{}; - bool uses_invocation_id{}; - bool uses_invocation_info{}; - bool uses_sample_id{}; + explicit Info(std::span user_data_) : user_data{user_data_} {} - std::array interpolation{}; - // VaryingState loads; - // VaryingState stores; - // VaryingState passthrough; + struct VsInput { + AmdGpu::NumberFormat fmt; + u16 binding; + u16 num_components; + }; + boost::container::static_vector vs_inputs{}; - std::array stores_frag_color{}; - bool stores_sample_mask{}; - bool stores_frag_depth{}; + struct PsInput { + u32 param_index; + u32 semantic; + bool is_default; + bool is_flat; + u32 default_value; + }; + boost::container::static_vector ps_inputs{}; - bool uses_fp16{}; - bool uses_fp64{}; - bool uses_fp16_denorms_flush{}; - bool uses_fp16_denorms_preserve{}; - bool uses_fp32_denorms_flush{}; - bool uses_fp32_denorms_preserve{}; - bool uses_int8{}; - bool uses_int16{}; - bool uses_int64{}; - bool uses_image_1d{}; - bool uses_sampled_1d{}; - bool uses_subgroup_vote{}; - bool uses_subgroup_mask{}; - bool uses_derivatives{}; + struct AttributeFlags { + bool Get(IR::Attribute attrib, u32 comp = 0) const { + return flags[static_cast(attrib)] & (1 << comp); + } - IR::Type used_constant_buffer_types{}; - IR::Type used_storage_buffer_types{}; - IR::Type used_indirect_cbuf_types{}; + bool GetAny(IR::Attribute attrib) const { + return flags[static_cast(attrib)]; + } - // std::array constant_buffer_used_sizes{}; - u32 used_clip_distances{}; + void Set(IR::Attribute attrib, u32 comp = 0) { + flags[static_cast(attrib)] |= (1 << comp); + } - // boost::container::static_vector - // constant_buffer_descriptors; - // boost::container::static_vector - // storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors; - // ImageBufferDescriptors image_buffer_descriptors; - // TextureDescriptors texture_descriptors; - // ImageDescriptors image_descriptors; + u32 NumComponents(IR::Attribute attrib) const { + const u8 mask = flags[static_cast(attrib)]; + ASSERT(mask != 0b1011 || mask != 0b1101); + return std::popcount(mask); + } + + std::array flags; + }; + AttributeFlags loads{}; + AttributeFlags stores{}; + + std::span user_data; + Stage stage; + + template + T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept { + T data; + u32* base; + std::memcpy(&base, &user_data[ptr_index], sizeof(base)); + std::memcpy(&data, base + dword_offset, sizeof(T)); + return data; + } }; } // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index c93d019b2..a4e9df376 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -33,13 +33,13 @@ struct Liverpool { static constexpr u32 NumColorBuffers = 8; static constexpr u32 NumViewports = 16; static constexpr u32 NumClipPlanes = 6; - static constexpr u32 NumWordsShaderUserData = 16; + static constexpr u32 NumShaderUserData = 16; static constexpr u32 UconfigRegWordOffset = 0xC000; static constexpr u32 ContextRegWordOffset = 0xA000; static constexpr u32 ShRegWordOffset = 0x2C00; static constexpr u32 NumRegs = 0xD000; - using UserData = std::array; + using UserData = std::array; struct ShaderProgram { u32 address_lo; @@ -58,6 +58,14 @@ struct Liverpool { } }; + union PsInputControl { + u32 raw; + BitField<0, 5, u32> input_offset; + BitField<5, 1, u32> use_default; + BitField<8, 2, u32> default_value; + BitField<10, 1, u32> flat_shade; + }; + enum class ShaderExportComp : u32 { None = 0, OneComp = 1, @@ -552,9 +560,12 @@ struct Liverpool { INSERT_PADDING_WORDS(1); std::array viewports; std::array clip_user_data; - INSERT_PADDING_WORDS(0xA1B1 - 0xA187); + INSERT_PADDING_WORDS(0xA191 - 0xA187); + std::array ps_inputs; VsOutputConfig vs_output_config; - INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1); + INSERT_PADDING_WORDS(4); + BitField<0, 6, u32> num_interp; + INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1); ShaderPosFormat shader_pos_format; ShaderExportFormat z_export_format; ColorExportFormat color_export_format; @@ -631,7 +642,9 @@ static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B); static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F); static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F); +static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191); static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1); +static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6); static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3); static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4); static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5); diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 5bb8f0fbf..f963370db 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -2,10 +2,44 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include "common/assert.h" #include "video_core/amdgpu/pixel_format.h" namespace AmdGpu { +std::string_view NameOf(NumberFormat fmt) { + switch (fmt) { + case NumberFormat::Unorm: + return "Unorm"; + case NumberFormat::Snorm: + return "Snorm"; + case NumberFormat::Uscaled: + return "Uscaled"; + case NumberFormat::Sscaled: + return "Sscaled"; + case NumberFormat::Uint: + return "Uint"; + case NumberFormat::Sint: + return "Sint"; + case NumberFormat::SnormNz: + return "SnormNz"; + case NumberFormat::Float: + return "Float"; + case NumberFormat::Srgb: + return "Srgb"; + case NumberFormat::Ubnorm: + return "Ubnorm"; + case NumberFormat::UbnromNz: + return "UbnormNz"; + case NumberFormat::Ubint: + return "Ubint"; + case NumberFormat::Ubscaled: + return "Unscaled"; + default: + UNREACHABLE(); + } +} + u32 NumComponents(DataFormat format) { constexpr std::array numComponentsPerElement = { 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index f28e42356..fb0c27517 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include "common/types.h" namespace AmdGpu { @@ -59,6 +61,19 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; +[[nodiscard]] std::string_view NameOf(NumberFormat fmt); + u32 NumComponents(DataFormat format); } // namespace AmdGpu + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const { + return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt)); + } +}; + diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 23281c786..e0134442f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,7 +1,9 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/scope_exit.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -11,9 +13,31 @@ namespace Vulkan { +Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, + AmdGpu::Liverpool::Regs& regs) { + Shader::Info info{user_data}; + info.stage = stage; + switch (stage) { + case Shader::Stage::Fragment: { + for (u32 i = 0; i < regs.num_interp; i++) { + info.ps_inputs.push_back({ + .param_index = regs.ps_inputs[i].input_offset.Value(), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = regs.ps_inputs[i].default_value, + }); + } + break; + } + default: + break; + } + return info; +} + PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096}, + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 0U, @@ -50,8 +74,18 @@ void PipelineCache::BindPipeline() { // Compile and cache shader. const auto data = std::span{token, bininfo.length / sizeof(u32)}; - const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, pgm.user_data, data); - return CompileSPV(program, instance.GetDevice()); + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs); + auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info)); + const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program); + + static int counter = 0; + std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary); + file.write((const char*)code.data(), code.size() * sizeof(u32)); + file.close(); + + return CompileSPV(code, instance.GetDevice()); }; // Retrieve shader stage modules.