diff --git a/externals/sirit b/externals/sirit index b4eccb336..282083a59 160000 --- a/externals/sirit +++ b/externals/sirit @@ -1 +1 @@ -Subproject commit b4eccb336f1b1169af48dac1e04015985af86e3e +Subproject commit 282083a595dcca86814dedab2f2b0363ef38f1ec diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index c4c310586..baf9ced25 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -293,9 +293,17 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (stage == LogicalStage::Geometry) { ctx.AddCapability(spv::Capability::Geometry); } - if (info.stage == Stage::Fragment && profile.needs_manual_interpolation) { - ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); - ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); + if (info.stage == Stage::Fragment) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + ctx.AddExtension("SPV_AMD_shader_explicit_vertex_parameter"); + } else if (profile.supports_fragment_shader_barycentric) { + ctx.AddExtension("SPV_KHR_fragment_shader_barycentric"); + ctx.AddCapability(spv::Capability::FragmentBarycentricKHR); + } + if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample) || + info.loads.GetAny(IR::Attribute::BaryCoordNoPerspSample)) { + ctx.AddCapability(spv::Capability::SampleRateShading); + } } if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { ctx.AddCapability(spv::Capability::Tessellation); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 40f8d307c..ead2a2825 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -45,7 +45,7 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) { return ctx.OpAccessChain(ctx.output_f32, ctx.cull_distances, cull_num); } default: - UNREACHABLE(); + UNREACHABLE_MSG("Vertex output {}", u32(output)); } } @@ -88,7 +88,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) { case IR::Attribute::Depth: return ctx.frag_depth; default: - throw NotImplementedException("Write attribute {}", attr); + UNREACHABLE_MSG("Write attribute {}", attr); } } @@ -111,7 +111,7 @@ std::pair OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr case IR::Attribute::Depth: return {ctx.F32[1], false}; default: - throw NotImplementedException("Write attribute {}", attr); + UNREACHABLE_MSG("Write attribute {}", attr); } } } // Anonymous namespace @@ -159,81 +159,61 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { return result; } -static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (IR::IsPosition(attr)) { - ASSERT(attr == IR::Attribute::Position0); - const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ - ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index), ctx.ConstU32(0u))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - - if (IR::IsParam(attr)) { - const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)}; - const auto param = ctx.input_params.at(param_id).id; - const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]); - const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))}; - const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - return ctx.OpLoad(ctx.F32[1], - ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp))); - } - UNREACHABLE(); -} - Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) { - if (ctx.info.l_stage == LogicalStage::Geometry) { - return EmitGetAttributeForGeometry(ctx, attr, comp, index); - } else if (ctx.info.l_stage == LogicalStage::TessellationControl || - ctx.info.l_stage == LogicalStage::TessellationEval) { - if (IR::IsTessCoord(attr)) { - const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1; - const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]); - const auto pointer{ - ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))}; - return ctx.OpLoad(ctx.F32[1], pointer); - } - UNREACHABLE(); - } - if (IR::IsParam(attr)) { const u32 param_index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(param_index)}; - Id result; - if (param.is_loaded) { - // Attribute is either default or manually interpolated. The id points to an already - // loaded vector. - result = ctx.OpCompositeExtract(param.component_type, param.id, comp); - } else if (param.num_components > 1) { - // Attribute is a vector and we need to access a specific component. - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - result = ctx.OpLoad(param.component_type, pointer); - } else { - // Attribute is a single float or interger, simply load it. - result = ctx.OpLoad(param.component_type, param.id); - } - if (param.is_integer) { - result = ctx.OpBitcast(ctx.F32[1], result); - } - return result; + const Id value = [&] { + if (param.is_array) { + ASSERT(param.num_components > 1); + if (param.is_loaded) { + return ctx.OpCompositeExtract(param.component_type, param.id_array[index], + comp); + } else { + return ctx.OpLoad(param.component_type, + ctx.OpAccessChain(param.pointer_type, param.id, + ctx.ConstU32(index), ctx.ConstU32(comp))); + } + } else { + ASSERT(!param.is_loaded); + if (param.num_components > 1) { + return ctx.OpLoad( + param.component_type, + ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))); + } else { + return ctx.OpLoad(param.component_type, param.id); + } + } + }(); + return param.is_integer ? ctx.OpBitcast(ctx.F32[1], value) : value; + } + if (IR::IsBarycentricCoord(attr) && ctx.profile.supports_fragment_shader_barycentric) { + ++comp; } - switch (attr) { - case IR::Attribute::FragCoord: { - const Id coord = ctx.OpLoad( - ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); - if (comp == 3) { - return ctx.OpFDiv(ctx.F32[1], ctx.ConstF32(1.f), coord); - } - return coord; - } + case IR::Attribute::Position0: + ASSERT(ctx.l_stage == LogicalStage::Geometry); + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.gl_in, ctx.ConstU32(index), + ctx.ConstU32(0U), ctx.ConstU32(comp))); + case IR::Attribute::FragCoord: + return ctx.OpLoad(ctx.F32[1], + ctx.OpAccessChain(ctx.input_f32, ctx.frag_coord, ctx.ConstU32(comp))); case IR::Attribute::TessellationEvaluationPointU: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); case IR::Attribute::TessellationEvaluationPointV: return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.ConstU32(1U))); + case IR::Attribute::BaryCoordSmooth: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth, + ctx.ConstU32(comp))); + case IR::Attribute::BaryCoordSmoothSample: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_smooth_sample, + ctx.ConstU32(comp))); + case IR::Attribute::BaryCoordNoPersp: + return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.bary_coord_nopersp, + ctx.ConstU32(comp))); default: UNREACHABLE_MSG("Read attribute {}", attr); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index fe7bd3356..70a44cbe4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -9,7 +9,7 @@ namespace Shader::Backend::SPIRV { void EmitPrologue(EmitContext& ctx) { if (ctx.stage == Stage::Fragment) { - ctx.DefineInterpolatedAttribs(); + ctx.DefineAmdPerVertexAttribs(); } if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) { ctx.DefineWorkgroupIndex(); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index e16bba755..f373808d9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -196,14 +196,15 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) { } EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, - u32 num_components, bool output) { + u32 num_components, bool output, + bool loaded, bool array) { switch (GetNumberClass(fmt)) { case AmdGpu::NumberClass::Float: - return {id, output ? output_f32 : input_f32, F32[1], num_components, false}; + return {id, output ? output_f32 : input_f32, F32[1], num_components, false, loaded, array}; case AmdGpu::NumberClass::Uint: - return {id, output ? output_u32 : input_u32, U32[1], num_components, true}; + return {id, output ? output_u32 : input_u32, U32[1], num_components, true, loaded, array}; case AmdGpu::NumberClass::Sint: - return {id, output ? output_s32 : input_s32, S32[1], num_components, true}; + return {id, output ? output_s32 : input_s32, S32[1], num_components, true, loaded, array}; default: break; } @@ -298,33 +299,24 @@ void EmitContext::DefineBufferProperties() { } } -void EmitContext::DefineInterpolatedAttribs() { - if (!profile.needs_manual_interpolation) { +void EmitContext::DefineAmdPerVertexAttribs() { + if (!profile.supports_amd_shader_explicit_vertex_parameter) { return; } - // Iterate all input attributes, load them and manually interpolate. for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; - auto& params = input_params[i]; - if (input.is_flat || params.is_loaded) { + if (input.IsDefault() || info.fs_interpolation[i].primary != Qualifier::PerVertex) { continue; } - const Id p_array{OpLoad(TypeArray(F32[4], ConstU32(3U)), params.id)}; - const Id p0{OpCompositeExtract(F32[4], p_array, 0U)}; - const Id p1{OpCompositeExtract(F32[4], p_array, 1U)}; - const Id p2{OpCompositeExtract(F32[4], p_array, 2U)}; - const Id p10{OpFSub(F32[4], p1, p0)}; - const Id p20{OpFSub(F32[4], p2, p0)}; - const Id bary_coord{OpLoad(F32[3], IsLinear(info.interp_qualifiers[i]) - ? bary_coord_linear_id - : bary_coord_persp_id)}; - const Id bary_coord_y{OpCompositeExtract(F32[1], bary_coord, 1)}; - const Id bary_coord_z{OpCompositeExtract(F32[1], bary_coord, 2)}; - const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)}; - const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)}; - params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z)); - Name(params.id, fmt::format("fs_in_attr{}", i)); - params.is_loaded = true; + auto& param = input_params[i]; + const Id pointer = param.id; + param.id_array[0] = + OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(0U)); + param.id_array[1] = + OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(1U)); + param.id_array[2] = + OpInterpolateAtVertexAMD(F32[param.num_components], pointer, ConstU32(2U)); + param.is_loaded = true; } } @@ -342,21 +334,6 @@ void EmitContext::DefineWorkgroupIndex() { Name(workgroup_index_id, "workgroup_index"); } -Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { - switch (default_value) { - case 0: - return ctx.ConstF32(0.f, 0.f, 0.f, 0.f); - case 1: - return ctx.ConstF32(0.f, 0.f, 0.f, 1.f); - case 2: - return ctx.ConstF32(1.f, 1.f, 1.f, 0.f); - case 3: - return ctx.ConstF32(1.f, 1.f, 1.f, 1.f); - default: - UNREACHABLE(); - } -} - void EmitContext::DefineInputs() { if (info.uses_lane_id) { subgroup_local_invocation_id = DefineVariable( @@ -398,49 +375,71 @@ void EmitContext::DefineInputs() { front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input); } - if (profile.needs_manual_interpolation) { - if (info.has_perspective_interp) { - bary_coord_persp_id = + if (info.loads.GetAny(IR::Attribute::BaryCoordSmooth)) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + bary_coord_smooth = DefineVariable(F32[2], spv::BuiltIn::BaryCoordSmoothAMD, + spv::StorageClass::Input); + } else if (profile.supports_fragment_shader_barycentric) { + bary_coord_smooth = DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); + } else { + bary_coord_smooth = ConstF32(0.f, 0.f); } - if (info.has_linear_interp) { - bary_coord_linear_id = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR, - spv::StorageClass::Input); + } + if (info.loads.GetAny(IR::Attribute::BaryCoordSmoothSample)) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + bary_coord_smooth_sample = DefineVariable( + F32[2], spv::BuiltIn::BaryCoordSmoothSampleAMD, spv::StorageClass::Input); + } else if (profile.supports_fragment_shader_barycentric) { + bary_coord_smooth_sample = + DefineVariable(F32[3], spv::BuiltIn::BaryCoordKHR, spv::StorageClass::Input); + // Decorate(bary_coord_smooth_sample, spv::Decoration::Sample); + } else { + bary_coord_smooth_sample = ConstF32(0.f, 0.f); + } + } + if (info.loads.GetAny(IR::Attribute::BaryCoordNoPersp)) { + if (profile.supports_amd_shader_explicit_vertex_parameter) { + bary_coord_nopersp = DefineVariable(F32[2], spv::BuiltIn::BaryCoordNoPerspAMD, + spv::StorageClass::Input); + } else if (profile.supports_fragment_shader_barycentric) { + bary_coord_nopersp = DefineVariable(F32[3], spv::BuiltIn::BaryCoordNoPerspKHR, + spv::StorageClass::Input); + } else { + bary_coord_nopersp = ConstF32(0.f, 0.f); } } for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; if (input.IsDefault()) { - input_params[i] = { - .id = MakeDefaultValue(*this, input.default_value), - .pointer_type = input_f32, - .component_type = F32[1], - .num_components = 4, - .is_integer = false, - .is_loaded = true, - }; continue; } - const IR::Attribute param{IR::Attribute::Param0 + i}; + const IR::Attribute param = IR::Attribute::Param0 + i; const u32 num_components = info.loads.NumComponents(param); - const Id type{F32[num_components]}; - Id attr_id{}; - if (profile.needs_manual_interpolation && !input.is_flat) { - attr_id = DefineInput(TypeArray(type, ConstU32(3U)), input.param_index); - Decorate(attr_id, spv::Decoration::PerVertexKHR); - Name(attr_id, fmt::format("fs_in_attr{}_p", i)); - } else { - attr_id = DefineInput(type, input.param_index); - Name(attr_id, fmt::format("fs_in_attr{}", i)); - - if (input.is_flat) { - Decorate(attr_id, spv::Decoration::Flat); - } else if (IsLinear(info.interp_qualifiers[i])) { - Decorate(attr_id, spv::Decoration::NoPerspective); + const auto [primary, auxiliary] = info.fs_interpolation[i]; + const Id type = F32[num_components]; + const Id attr_id = [&] { + if (primary == Qualifier::PerVertex && + profile.supports_fragment_shader_barycentric) { + return Name(DefineInput(TypeArray(type, ConstU32(3U)), input.param_index), + fmt::format("fs_in_attr{}_p", i)); } + return Name(DefineInput(type, input.param_index), fmt::format("fs_in_attr{}", i)); + }(); + if (primary == Qualifier::PerVertex) { + Decorate(attr_id, profile.supports_amd_shader_explicit_vertex_parameter + ? spv::Decoration::ExplicitInterpAMD + : spv::Decoration::PerVertexKHR); + } else if (primary != Qualifier::Smooth) { + Decorate(attr_id, primary == Qualifier::Flat ? spv::Decoration::Flat + : spv::Decoration::NoPerspective); } - input_params[i] = - GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); + if (auxiliary != Qualifier::None) { + Decorate(attr_id, auxiliary == Qualifier::Centroid ? spv::Decoration::Centroid + : spv::Decoration::Sample); + } + input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, + false, false, primary == Qualifier::PerVertex); } break; case LogicalStage::Compute: @@ -461,17 +460,16 @@ void EmitContext::DefineInputs() { case LogicalStage::Geometry: { primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); const auto gl_per_vertex = - Name(TypeStruct(TypeVector(F32[1], 4), F32[1], TypeArray(F32[1], ConstU32(1u))), - "gl_PerVertex"); + Name(TypeStruct(F32[4], F32[1], TypeArray(F32[1], ConstU32(1u))), "gl_PerVertex"); MemberName(gl_per_vertex, 0, "gl_Position"); MemberName(gl_per_vertex, 1, "gl_PointSize"); MemberName(gl_per_vertex, 2, "gl_ClipDistance"); MemberDecorate(gl_per_vertex, 0, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::Position)); + static_cast(spv::BuiltIn::Position)); MemberDecorate(gl_per_vertex, 1, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::PointSize)); + static_cast(spv::BuiltIn::PointSize)); MemberDecorate(gl_per_vertex, 2, spv::Decoration::BuiltIn, - static_cast(spv::BuiltIn::ClipDistance)); + static_cast(spv::BuiltIn::ClipDistance)); Decorate(gl_per_vertex, spv::Decoration::Block); const auto num_verts_in = NumVertices(runtime_info.gs_info.in_primitive); const auto vertices_in = TypeArray(gl_per_vertex, ConstU32(num_verts_in)); @@ -483,7 +481,8 @@ void EmitContext::DefineInputs() { const Id type{TypeArray(F32[4], ConstU32(num_verts_in))}; const Id id{DefineInput(type, param_id)}; Name(id, fmt::format("gs_in_attr{}", param_id)); - input_params[param_id] = {id, input_f32, F32[1], 4}; + input_params[param_id] = + GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, false, false, true); } break; } @@ -665,7 +664,7 @@ void EmitContext::DefineOutputs() { for (u32 attr_id = 0; attr_id < info.gs_copy_data.num_attrs; attr_id++) { const Id id{DefineOutput(F32[4], attr_id)}; Name(id, fmt::format("out_attr{}", attr_id)); - output_params[attr_id] = {id, output_f32, F32[1], 4u}; + output_params[attr_id] = GetAttributeInfo(AmdGpu::NumberFormat::Float, id, 4, true); } break; } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 186925706..f57dbebd8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -45,7 +45,7 @@ public: Id Def(const IR::Value& value); void DefineBufferProperties(); - void DefineInterpolatedAttribs(); + void DefineAmdPerVertexAttribs(); void DefineWorkgroupIndex(); [[nodiscard]] Id DefineInput(Id type, std::optional location = std::nullopt, @@ -279,8 +279,9 @@ public: Id shared_memory_u32_type{}; Id shared_memory_u64_type{}; - Id bary_coord_persp_id{}; - Id bary_coord_linear_id{}; + Id bary_coord_smooth{}; + Id bary_coord_smooth_sample{}; + Id bary_coord_nopersp{}; struct TextureDefinition { const VectorIds* data_types; @@ -355,12 +356,16 @@ public: Id sampler_pointer_type{}; struct SpirvAttribute { - Id id; + union { + Id id; + std::array id_array; + }; Id pointer_type; Id component_type; u32 num_components; bool is_integer{}; bool is_loaded{}; + bool is_array{}; }; Id input_attr_array; Id output_attr_array; @@ -390,7 +395,7 @@ private: void DefineFunctions(); SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id, u32 num_components, - bool output); + bool output, bool loaded = false, bool array = false); BufferSpv DefineBuffer(bool is_storage, bool is_written, u32 elem_shift, BufferType buffer_type, Id data_type); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 310ac9156..578c1f96a 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -21,50 +21,39 @@ namespace Shader::Gcn { -Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) - : info{info_}, runtime_info{runtime_info_}, profile{profile_}, - next_vgpr_num{runtime_info.num_allocated_vgprs} { - if (info.l_stage == LogicalStage::Fragment) { - dst_frag_vreg = GatherInterpQualifiers(); +static IR::VectorReg IterateBarycentrics(const RuntimeInfo& runtime_info, auto&& set_attribute) { + if (runtime_info.stage != Stage::Fragment) { + return IR::VectorReg::V0; } -} - -IR::VectorReg Translator::GatherInterpQualifiers() { u32 dst_vreg{}; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveSample; // J - info.has_perspective_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 1); // J } if (runtime_info.fs_info.addr_flags.persp_center_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCenter; // J - info.has_perspective_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 1); // J } if (runtime_info.fs_info.addr_flags.persp_centroid_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::PerspectiveCentroid; // J - info.has_perspective_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 1); // J } if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) { - ++dst_vreg; // I/W - ++dst_vreg; // J/W - ++dst_vreg; // 1/W + set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 0); // I/W + set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 1); // J/W + set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 2); // 1/W } if (runtime_info.fs_info.addr_flags.linear_sample_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearSample; // J - info.has_linear_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 1); // J } if (runtime_info.fs_info.addr_flags.linear_center_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCenter; // J - info.has_linear_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 1); // J } if (runtime_info.fs_info.addr_flags.linear_centroid_ena) { - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // I - vgpr_to_interp[dst_vreg++] = IR::Interpolation::LinearCentroid; // J - info.has_linear_interp = true; + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 0); // I + set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 1); // J } if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) { ++dst_vreg; @@ -72,6 +61,14 @@ IR::VectorReg Translator::GatherInterpQualifiers() { return IR::VectorReg(dst_vreg); } +Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) + : info{info_}, runtime_info{runtime_info_}, profile{profile_}, + next_vgpr_num{runtime_info.num_allocated_vgprs} { + IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32) { + vgpr_to_interp[vreg] = attrib; + }); +} + void Translator::EmitPrologue(IR::Block* first_block) { ir = IR::IREmitter(*first_block, first_block->begin()); @@ -127,7 +124,10 @@ void Translator::EmitPrologue(IR::Block* first_block) { } break; case LogicalStage::Fragment: - dst_vreg = dst_frag_vreg; + dst_vreg = + IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32 comp) { + ir.SetVectorReg(IR::VectorReg(vreg), ir.GetAttribute(attrib, comp)); + }); if (runtime_info.fs_info.addr_flags.pos_x_float_ena) { if (runtime_info.fs_info.en_flags.pos_x_float_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0)); @@ -151,7 +151,8 @@ void Translator::EmitPrologue(IR::Block* first_block) { } if (runtime_info.fs_info.addr_flags.pos_w_float_ena) { if (runtime_info.fs_info.en_flags.pos_w_float_ena) { - ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3)); + ir.SetVectorReg(dst_vreg++, + ir.FPRecip(ir.GetAttribute(IR::Attribute::FragCoord, 3))); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 4b5ff827b..a29bdc993 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -265,6 +265,7 @@ public: // Vector interpolation // VINTRP + void V_INTERP_P1_F32(const GcnInst& inst); void V_INTERP_P2_F32(const GcnInst& inst); void V_INTERP_MOV_F32(const GcnInst& inst); @@ -323,7 +324,6 @@ private: void LogMissingOpcode(const GcnInst& inst); IR::VectorReg GetScratchVgpr(u32 offset); - IR::VectorReg GatherInterpQualifiers(); private: IR::IREmitter ir; @@ -332,8 +332,7 @@ private: const Profile& profile; u32 next_vgpr_num; std::unordered_map vgpr_map; - std::array vgpr_to_interp{}; - IR::VectorReg dst_frag_vreg{}; + std::array vgpr_to_interp{}; bool opcode_missing = false; }; diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 5a287dbe2..c32e80815 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -5,11 +5,32 @@ namespace Shader::Gcn { +using Interpolation = Info::Interpolation; + +static Interpolation GetInterpolation(IR::Attribute attribute) { + switch (attribute) { + case IR::Attribute::BaryCoordNoPersp: + return {Qualifier::NoPerspective, Qualifier::None}; + case IR::Attribute::BaryCoordNoPerspCentroid: + return {Qualifier::NoPerspective, Qualifier::Centroid}; + case IR::Attribute::BaryCoordNoPerspSample: + return {Qualifier::NoPerspective, Qualifier::Sample}; + case IR::Attribute::BaryCoordSmooth: + return {Qualifier::Smooth, Qualifier::None}; + case IR::Attribute::BaryCoordSmoothCentroid: + return {Qualifier::Smooth, Qualifier::Centroid}; + case IR::Attribute::BaryCoordSmoothSample: + return {Qualifier::Smooth, Qualifier::Sample}; + default: + UNREACHABLE_MSG("Unhandled barycentric attribute {}", NameOf(attribute)); + } +} + void Translator::EmitVectorInterpolation(const GcnInst& inst) { switch (inst.opcode) { // VINTRP case Opcode::V_INTERP_P1_F32: - return; + return V_INTERP_P1_F32(inst); case Opcode::V_INTERP_P2_F32: return V_INTERP_P2_F32(inst); case Opcode::V_INTERP_MOV_F32: @@ -21,19 +42,57 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) { // VINTRP +void Translator::V_INTERP_P1_F32(const GcnInst& inst) { + if (!profile.needs_manual_interpolation) { + return; + } + // VDST = P10 * VSRC + P0 + const u32 attr_index = inst.control.vintrp.attr; + const IR::Attribute attrib = IR::Attribute::Param0 + attr_index; + const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0); + const IR::F32 p1 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 1); + const IR::F32 i = GetSrc(inst.src[0]); + const IR::F32 result = ir.FPFma(ir.FPSub(p1, p0), i, p0); + SetDst(inst.dst[0], result); +} + void Translator::V_INTERP_P2_F32(const GcnInst& inst) { const u32 attr_index = inst.control.vintrp.attr; - const auto& attr = runtime_info.fs_info.inputs.at(attr_index); - info.interp_qualifiers[attr_index] = vgpr_to_interp[inst.src[0].code]; - const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; - SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + const IR::Attribute attrib = IR::Attribute::Param0 + attr_index; + const auto& attr = runtime_info.fs_info.inputs[attr_index]; + auto& interp = info.fs_interpolation[attr_index]; + ASSERT(!attr.IsDefault() && !attr.is_flat); + if (!profile.needs_manual_interpolation) { + interp = GetInterpolation(vgpr_to_interp[inst.src[0].code]); + SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + return; + } + // VDST = P20 * VSRC + VDST + const IR::F32 p0 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 0); + const IR::F32 p2 = ir.GetAttribute(attrib, inst.control.vintrp.chan, 2); + const IR::F32 j = GetSrc(inst.src[0]); + const IR::F32 result = ir.FPFma(ir.FPSub(p2, p0), j, GetSrc(inst.dst[0])); + interp.primary = Qualifier::PerVertex; + SetDst(inst.dst[0], result); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { const u32 attr_index = inst.control.vintrp.attr; - const auto& attr = runtime_info.fs_info.inputs.at(attr_index); - const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; - SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + const IR::Attribute attrib = IR::Attribute::Param0 + attr_index; + const auto& attr = runtime_info.fs_info.inputs[attr_index]; + auto& interp = info.fs_interpolation[attr_index]; + ASSERT(attr.is_flat); + if (profile.supports_amd_shader_explicit_vertex_parameter || + (profile.supports_fragment_shader_barycentric && + !profile.has_incomplete_fragment_shader_barycentric)) { + // VSRC 0=P10, 1=P20, 2=P0 + interp.primary = Qualifier::PerVertex; + SetDst(inst.dst[0], + ir.GetAttribute(attrib, inst.control.vintrp.chan, (inst.src[0].code + 1) % 3)); + } else { + interp.primary = Qualifier::Flat; + SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); + } } } // namespace Shader::Gcn diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 6e12c6816..bb5c88584 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -1,5 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later + #pragma once #include @@ -135,6 +136,16 @@ struct PushData { static_assert(sizeof(PushData) <= 128, "PushData size is greater than minimum size guaranteed by Vulkan spec"); +enum class Qualifier : u8 { + None, + Smooth, + NoPerspective, + PerVertex, + Flat, + Centroid, + Sample, +}; + /** * Contains general information generated by the shader recompiler for an input program. */ @@ -194,7 +205,11 @@ struct Info { PersistentSrtInfo srt_info; std::vector flattened_ud_buf; - std::array interp_qualifiers{}; + struct Interpolation { + Qualifier primary; + Qualifier auxiliary; + }; + std::array fs_interpolation{}; IR::ScalarReg tess_consts_ptr_base = IR::ScalarReg::Max; s32 tess_consts_dword_offset = -1; @@ -209,8 +224,6 @@ struct Info { bool has_discard{}; bool has_image_gather{}; bool has_image_query{}; - bool has_perspective_interp{}; - bool has_linear_interp{}; bool uses_buffer_atomic_float_min_max{}; bool uses_image_atomic_float_min_max{}; bool uses_lane_id{}; diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index b2f11d141..094c34ee8 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -130,6 +130,20 @@ std::string NameOf(Attribute attribute) { return "LocalInvocationIndex"; case Attribute::FragCoord: return "FragCoord"; + case Attribute::BaryCoordNoPersp: + return "BaryCoordNoPersp"; + case Attribute::BaryCoordNoPerspCentroid: + return "BaryCoordNoPerspCentroid"; + case Attribute::BaryCoordNoPerspSample: + return "BaryCoordNoPerspSample"; + case Attribute::BaryCoordSmooth: + return "BaryCoordSmooth"; + case Attribute::BaryCoordSmoothCentroid: + return "BaryCoordSmoothCentroid"; + case Attribute::BaryCoordSmoothSample: + return "BaryCoordSmoothSample"; + case Attribute::BaryCoordPullModel: + return "BaryCoordPullModel"; case Attribute::InvocationId: return "InvocationId"; case Attribute::PatchVertices: diff --git a/src/shader_recompiler/ir/attribute.h b/src/shader_recompiler/ir/attribute.h index b6b1c8b59..00ec6c4b3 100644 --- a/src/shader_recompiler/ir/attribute.h +++ b/src/shader_recompiler/ir/attribute.h @@ -73,24 +73,21 @@ enum class Attribute : u64 { LocalInvocationId = 76, LocalInvocationIndex = 77, FragCoord = 78, - InvocationId = 81, // TCS id in output patch and instanced geometry shader id - PatchVertices = 82, - TessellationEvaluationPointU = 83, - TessellationEvaluationPointV = 84, - PackedHullInvocationInfo = 85, // contains patch id within the VGT and invocation ID + BaryCoordNoPersp = 79, + BaryCoordNoPerspCentroid = 80, + BaryCoordNoPerspSample = 81, + BaryCoordSmooth = 82, + BaryCoordSmoothCentroid = 83, + BaryCoordSmoothSample = 84, + BaryCoordPullModel = 85, + InvocationId = 86, // TCS id in output patch and instanced geometry shader id + PatchVertices = 87, + TessellationEvaluationPointU = 88, + TessellationEvaluationPointV = 89, + PackedHullInvocationInfo = 90, // contains patch id within the VGT and invocation ID Max, }; -enum class Interpolation { - Invalid = 0, - PerspectiveSample = 1, - PerspectiveCenter = 2, - PerspectiveCentroid = 3, - LinearSample = 4, - LinearCenter = 5, - LinearCentroid = 6, -}; - constexpr size_t NumAttributes = static_cast(Attribute::Max); constexpr size_t NumRenderTargets = 8; constexpr size_t NumParams = 32; @@ -112,13 +109,8 @@ constexpr bool IsMrt(Attribute attribute) noexcept { return attribute >= Attribute::RenderTarget0 && attribute <= Attribute::RenderTarget7; } -constexpr bool IsLinear(Interpolation interp) noexcept { - return interp >= Interpolation::LinearSample && interp <= Interpolation::LinearCentroid; -} - -constexpr bool IsPerspective(Interpolation interp) noexcept { - return interp >= Interpolation::PerspectiveSample && - interp <= Interpolation::PerspectiveCentroid; +constexpr bool IsBarycentricCoord(Attribute attribute) noexcept { + return attribute >= Attribute::BaryCoordSmooth && attribute <= Attribute::BaryCoordSmoothSample; } [[nodiscard]] std::string NameOf(Attribute attribute); diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index ad36a2e13..d57e18ff0 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -10,16 +10,10 @@ namespace Shader { struct Profile { u32 supported_spirv{0x00010000}; u32 subgroup_size{}; - bool unified_descriptor_binding{}; - bool support_descriptor_aliasing{}; bool support_int8{}; bool support_int16{}; bool support_int64{}; bool support_float64{}; - bool support_vertex_instance_id{}; - bool support_float_controls{}; - bool support_separate_denorm_behavior{}; - bool support_separate_rounding_mode{}; bool support_fp32_denorm_preserve{}; bool support_fp32_denorm_flush{}; bool support_fp32_round_to_zero{}; @@ -33,6 +27,9 @@ struct Profile { bool supports_buffer_int64_atomics{}; bool supports_shared_int64_atomics{}; bool supports_workgroup_explicit_memory_layout{}; + bool supports_amd_shader_explicit_vertex_parameter{}; + bool supports_fragment_shader_barycentric{}; + bool has_incomplete_fragment_shader_barycentric{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3c8332c10..4d89c83b2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -137,7 +137,8 @@ GraphicsPipeline::GraphicsPipeline( const vk::PipelineMultisampleStateCreateInfo multisampling = { .rasterizationSamples = LiverpoolToVK::NumSamples(key.num_samples, instance.GetFramebufferSampleCounts()), - .sampleShadingEnable = false, + .sampleShadingEnable = + fs_info.addr_flags.persp_sample_ena || fs_info.addr_flags.linear_sample_ena, }; const vk::PipelineViewportDepthClipControlCreateInfoEXT clip_control = { diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index c0b138fad..119c0a367 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -273,7 +273,12 @@ bool Instance::CreateDevice() { depth_clip_enable = add_extension(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME); vertex_input_dynamic_state = add_extension(VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME); list_restart = add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME); - fragment_shader_barycentric = add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + amd_shader_explicit_vertex_parameter = + add_extension(VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME); + if (!amd_shader_explicit_vertex_parameter) { + fragment_shader_barycentric = + add_extension(VK_KHR_FRAGMENT_SHADER_BARYCENTRIC_EXTENSION_NAME); + } legacy_vertex_attributes = add_extension(VK_EXT_LEGACY_VERTEX_ATTRIBUTES_EXTENSION_NAME); provoking_vertex = add_extension(VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME); shader_stencil_export = add_extension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index d96abfabe..9be2d9520 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -145,6 +145,11 @@ public: return fragment_shader_barycentric; } + /// Returns true when VK_AMD_shader_explicit_vertex_parameter is supported. + bool IsAmdShaderExplicitVertexParameterSupported() const { + return amd_shader_explicit_vertex_parameter; + } + /// Returns true when VK_EXT_primitive_topology_list_restart is supported. bool IsListRestartSupported() const { return list_restart; @@ -413,6 +418,7 @@ private: u32 queue_family_index{0}; bool custom_border_color{}; bool fragment_shader_barycentric{}; + bool amd_shader_explicit_vertex_parameter{}; bool depth_clip_control{}; bool depth_clip_enable{}; bool depth_range_unrestricted{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 31ede7936..4de8fd73b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -220,6 +220,12 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .supports_shared_int64_atomics = instance_.IsSharedInt64AtomicsSupported(), .supports_workgroup_explicit_memory_layout = instance_.IsWorkgroupMemoryExplicitLayoutSupported(), + .supports_amd_shader_explicit_vertex_parameter = + instance_.IsAmdShaderExplicitVertexParameterSupported(), + .supports_fragment_shader_barycentric = instance_.IsFragmentShaderBarycentricSupported(), + .has_incomplete_fragment_shader_barycentric = + instance_.IsFragmentShaderBarycentricSupported() && + instance.GetDriverID() == vk::DriverId::eMoltenvk, .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||