diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fb3e615f..23a205249 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -918,6 +918,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/ir/opcodes.inc src/shader_recompiler/ir/patch.cpp src/shader_recompiler/ir/patch.h + src/shader_recompiler/ir/position.h src/shader_recompiler/ir/post_order.cpp src/shader_recompiler/ir/post_order.h src/shader_recompiler/ir/program.cpp diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 20d2c6587..f41765a66 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -93,17 +93,24 @@ void Translator::ExportRenderTarget(const GcnInst& inst) { } } + // Metal seems to have an issue where 8-bit unorm/snorm/sRGB outputs to render target + // need a bias applied to round correctly; detect and set the flag for that here. + const auto needs_unorm_fixup = profile.needs_unorm_fixup && + (color_buffer.num_format == AmdGpu::NumberFormat::Unorm || + color_buffer.num_format == AmdGpu::NumberFormat::Snorm || + color_buffer.num_format == AmdGpu::NumberFormat::Srgb) && + (color_buffer.data_format == AmdGpu::DataFormat::Format8 || + color_buffer.data_format == AmdGpu::DataFormat::Format8_8 || + color_buffer.data_format == AmdGpu::DataFormat::Format8_8_8_8); + // Swizzle components and export for (u32 i = 0; i < 4; ++i) { - const u32 comp_swizzle = static_cast(color_buffer.swizzle.array[i]); - constexpr u32 min_swizzle = static_cast(AmdGpu::CompSwizzle::Red); - const auto swizzled_comp = - components[comp_swizzle >= min_swizzle ? comp_swizzle - min_swizzle : i]; + const auto swizzled_comp = components[color_buffer.swizzle.Map(i)]; if (swizzled_comp.IsEmpty()) { continue; } auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion); - if (color_buffer.needs_unorm_fixup) { + if (needs_unorm_fixup) { // FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected. converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f)); } diff --git a/src/shader_recompiler/ir/position.h b/src/shader_recompiler/ir/position.h index 0fdeb0eb8..eb67634f0 100644 --- a/src/shader_recompiler/ir/position.h +++ b/src/shader_recompiler/ir/position.h @@ -3,6 +3,7 @@ #pragma once +#include "common/logging/log.h" #include "shader_recompiler/ir/ir_emitter.h" #include "shader_recompiler/runtime_info.h" @@ -45,8 +46,12 @@ inline void ExportPosition(IREmitter& ir, const auto& stage, Attribute attribute case Output::GsMrtIndex: ir.SetAttribute(IR::Attribute::RenderTargetId, value); break; + case Output::None: + LOG_WARNING(Render_Recompiler, "The {} component of {} isn't mapped, skipping", + "xyzw"[comp], NameOf(attribute)); + break; default: - UNREACHABLE_MSG("Unhandled output {} on attribute {}", u32(output), u32(attribute)); + UNREACHABLE_MSG("Unhandled output {} on attribute {}", u32(output), NameOf(attribute)); } } diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index d57e18ff0..ba6facff5 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -35,6 +35,7 @@ struct Profile { bool needs_manual_interpolation{}; bool needs_lds_barriers{}; bool needs_buffer_offsets{}; + bool needs_unorm_fixup{}; u64 max_ubo_size{}; u32 max_viewport_width{}; u32 max_viewport_height{}; diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 791f305b0..7ac876c50 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -93,7 +93,8 @@ struct VertexRuntimeInfo { u32 hs_output_cp_stride{}; bool operator==(const VertexRuntimeInfo& other) const noexcept { - return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && + return num_outputs == other.num_outputs && outputs == other.outputs && + emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && clip_disable == other.clip_disable && tess_type == other.tess_type && tess_topology == other.tess_topology && tess_partitioning == other.tess_partitioning && @@ -158,8 +159,9 @@ struct GeometryRuntimeInfo { u64 vs_copy_hash; bool operator==(const GeometryRuntimeInfo& other) const noexcept { - return num_invocations && other.num_invocations && - output_vertices == other.output_vertices && in_primitive == other.in_primitive && + return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations && + other.num_invocations && output_vertices == other.output_vertices && + in_primitive == other.in_primitive && std::ranges::equal(out_primitive, other.out_primitive); } }; @@ -177,8 +179,6 @@ struct PsColorBuffer { AmdGpu::NumberFormat num_format : 4; AmdGpu::NumberConversion num_conversion : 3; AmdGpu::Liverpool::ShaderExportFormat export_format : 4; - u32 needs_unorm_fixup : 1; - u32 pad : 20; AmdGpu::CompMapping swizzle; bool operator==(const PsColorBuffer& other) const noexcept = default; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 941a79c2d..aaff3e31f 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -794,6 +794,7 @@ struct Liverpool { ReverseSubtract = 4, }; + u32 raw; BitField<0, 5, BlendFactor> color_src_factor; BitField<5, 3, BlendFunc> color_func; BitField<8, 5, BlendFactor> color_dst_factor; @@ -803,6 +804,10 @@ struct Liverpool { BitField<29, 1, u32> separate_alpha_blend; BitField<30, 1, u32> enable; BitField<31, 1, u32> disable_rop3; + + bool operator==(const BlendControl& other) const { + return raw == other.raw; + } }; union ColorControl { @@ -919,7 +924,7 @@ struct Liverpool { INSERT_PADDING_WORDS(2); operator bool() const { - return info.format != DataFormat::FormatInvalid; + return base_address && info.format != DataFormat::FormatInvalid; } u32 Pitch() const { diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 84e0fc2ee..af7a69746 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -85,7 +85,7 @@ enum class NumberClass { Uint, }; -enum class CompSwizzle : u32 { +enum class CompSwizzle : u8 { Zero = 0, One = 1, Red = 4, @@ -136,6 +136,12 @@ union CompMapping { return result; } + [[nodiscard]] u32 Map(u32 comp) const { + const u32 swizzled_comp = u32(array[comp]); + constexpr u32 min_comp = u32(AmdGpu::CompSwizzle::Red); + return swizzled_comp >= min_comp ? swizzled_comp - min_comp : comp; + } + private: template T ApplySingle(const std::array& data, const CompSwizzle swizzle) const { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8094bc260..e4ebb8104 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -168,9 +168,6 @@ GraphicsPipeline::GraphicsPipeline( dynamic_states.push_back(vk::DynamicState::eDepthBoundsTestEnable); dynamic_states.push_back(vk::DynamicState::eDepthBounds); } - if (instance.IsDynamicColorWriteMaskSupported()) { - dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT); - } if (instance.IsVertexInputDynamicState()) { dynamic_states.push_back(vk::DynamicState::eVertexInputEXT); } else if (!vertex_bindings.empty()) { @@ -291,11 +288,7 @@ GraphicsPipeline::GraphicsPipeline( .alphaBlendOp = control.separate_alpha_blend ? LiverpoolToVK::BlendOp(control.alpha_func) : color_blend, - .colorWriteMask = - instance.IsDynamicColorWriteMaskSupported() - ? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA - : key.write_masks[i], + .colorWriteMask = vk::ColorComponentFlags{key.write_masks[i]}, }; // On GCN GPU there is an additional mask which allows to control color components exported diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 39eb1e2be..e7d4c9463 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -38,7 +38,7 @@ struct GraphicsPipelineKey { u32 num_color_attachments; std::array color_buffers; std::array blend_controls; - std::array write_masks; + std::array write_masks; Liverpool::ColorBufferMask cb_shader_mask; Liverpool::ColorControl::LogicOp logic_op; u32 num_samples; @@ -80,11 +80,7 @@ public: return fetch_shader; } - auto GetWriteMasks() const { - return key.write_masks; - } - - auto GetMrtMask() const { + u32 GetMrtMask() const { return key.mrt_mask; } diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d13aeec99..2201c6493 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -255,13 +255,6 @@ bool Instance::CreateDevice() { // Optional maintenance_8 = add_extension(VK_KHR_MAINTENANCE_8_EXTENSION_NAME); depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); - dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - if (dynamic_state_3) { - dynamic_state_3_features = - feature_chain.get(); - LOG_INFO(Render_Vulkan, "- extendedDynamicState3ColorWriteMask: {}", - dynamic_state_3_features.extendedDynamicState3ColorWriteMask); - } robustness2 = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); if (robustness2) { robustness2_features = feature_chain.get(); @@ -426,10 +419,6 @@ bool Instance::CreateDevice() { .customBorderColors = true, .customBorderColorWithoutFormat = true, }, - vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ - .extendedDynamicState3ColorWriteMask = - dynamic_state_3_features.extendedDynamicState3ColorWriteMask, - }, vk::PhysicalDeviceDepthClipControlFeaturesEXT{ .depthClipControl = true, }, @@ -505,9 +494,6 @@ bool Instance::CreateDevice() { if (!custom_border_color) { device_chain.unlink(); } - if (!dynamic_state_3) { - device_chain.unlink(); - } if (!depth_clip_control) { device_chain.unlink(); } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index be316f6e8..3bf7d335d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -139,12 +139,6 @@ public: return depth_range_unrestricted; } - /// Returns true when the extendedDynamicState3ColorWriteMask feature of - /// VK_EXT_extended_dynamic_state3 is supported. - bool IsDynamicColorWriteMaskSupported() const { - return dynamic_state_3 && dynamic_state_3_features.extendedDynamicState3ColorWriteMask; - } - /// Returns true when VK_EXT_vertex_input_dynamic_state is supported. bool IsVertexInputDynamicState() const { return vertex_input_dynamic_state; @@ -439,7 +433,6 @@ private: vk::PhysicalDeviceFeatures features; vk::PhysicalDeviceVulkan12Features vk12_features; vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; - vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features; vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR @@ -461,7 +454,6 @@ private: bool depth_clip_control{}; bool depth_clip_enable{}; bool depth_range_unrestricted{}; - bool dynamic_state_3{}; bool vertex_input_dynamic_state{}; bool robustness2{}; bool list_restart{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 56f788ea2..5e98810f9 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -241,6 +241,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || instance.GetDriverID() == vk::DriverId::eMoltenvk, .needs_buffer_offsets = instance.StorageMinAlignment() > 4, + .needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk, // When binding a UBO, we calculate its size considering the offset in the larger buffer // cache underlying resource. In some cases, it may produce sizes exceeding the system // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. @@ -297,8 +298,7 @@ const ComputePipeline* PipelineCache::GetComputePipeline() { bool PipelineCache::RefreshGraphicsKey() { std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey)); - - auto& regs = liverpool->regs; + const auto& regs = liverpool->regs; auto& key = graphics_key; key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value() @@ -312,65 +312,72 @@ bool PipelineCache::RefreshGraphicsKey() { key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last; key.prim_type = regs.primitive_type; key.polygon_mode = regs.polygon_control.PolyMode(); + key.patch_control_points = + regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0; key.logic_op = regs.color_control.rop3; key.num_samples = regs.NumSamples(); + key.cb_shader_mask = regs.color_shader_mask; const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color - // attachments. This might be not a case as HW color buffers can be bound in an arbitrary - // order. We need to do some arrays compaction at this stage - key.num_color_attachments = 0; - key.color_buffers.fill({}); - key.blend_controls.fill({}); - key.write_masks.fill({}); - key.vertex_buffer_formats.fill(vk::Format::eUndefined); - - key.patch_control_points = 0; - if (regs.stage_enable.hs_en.Value()) { - key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value(); - } - - // First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader - // recompiler. - for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { - auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf) { - // No attachment bound and no incremented index. + // First pass to fill render target information + for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) { + const auto& col_buf = regs.color_buffers[cb]; + const u32 target_mask = regs.color_target_mask.GetMask(cb); + if (!col_buf || !target_mask) { + // No attachment bound or writing to it is disabled. continue; } - const auto remapped_cb = key.num_color_attachments++; - if (!regs.color_target_mask.GetMask(cb)) { - // Bound to null handle, skip over this attachment index. - continue; - } - - // Metal seems to have an issue where 8-bit unorm/snorm/sRGB outputs to render target - // need a bias applied to round correctly; detect and set the flag for that here. - const auto needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk && - (col_buf.GetNumberFmt() == AmdGpu::NumberFormat::Unorm || - col_buf.GetNumberFmt() == AmdGpu::NumberFormat::Snorm || - col_buf.GetNumberFmt() == AmdGpu::NumberFormat::Srgb) && - (col_buf.GetDataFmt() == AmdGpu::DataFormat::Format8 || - col_buf.GetDataFmt() == AmdGpu::DataFormat::Format8_8 || - col_buf.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8); - - key.color_buffers[remapped_cb] = Shader::PsColorBuffer{ + // Fill color target information + key.color_buffers[cb] = Shader::PsColorBuffer{ .data_format = col_buf.GetDataFmt(), .num_format = col_buf.GetNumberFmt(), .num_conversion = col_buf.GetNumberConversion(), .export_format = regs.color_export_format.GetFormat(cb), - .needs_unorm_fixup = needs_unorm_fixup, .swizzle = col_buf.Swizzle(), }; + + // Fill color blending information + key.blend_controls[cb] = regs.blend_control[cb]; + key.blend_controls[cb].enable.Assign(regs.blend_control[cb].enable && + !col_buf.info.blend_bypass); + + // Apply swizzle to target mask + const auto& swizzle = key.color_buffers[cb].swizzle; + for (u32 i = 0; i < 4; ++i) { + key.write_masks[cb] |= ((target_mask >> i) & 1) << swizzle.Map(i); + } } + // Compile and bind shader stages + if (!RefreshGraphicsStages()) { + return false; + } + + // Second pass to mask out render targets not written by fragment shader + for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) { + const auto& col_buf = regs.color_buffers[cb]; + if (!col_buf || !regs.color_target_mask.GetMask(cb)) { + continue; + } + if ((key.mrt_mask & (1u << cb)) == 0) { + // Attachment is bound and mask allows writes but shader does not output to it. + key.color_buffers[cb] = {}; + } + } + + return true; +} + +bool PipelineCache::RefreshGraphicsStages() { + const auto& regs = liverpool->regs; + auto& key = graphics_key; fetch_shader = std::nullopt; Shader::Backend::Bindings binding{}; - const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool { + const auto bind_stage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); const auto stage_out_idx = static_cast(stage_out); if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) { @@ -405,52 +412,50 @@ bool PipelineCache::RefreshGraphicsKey() { return true; }; - const auto& IsGsFeaturesSupported = [&]() -> bool { - // These checks are temporary until all functionality is implemented. - return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw; - }; - infos.fill(nullptr); - TryBindStage(Stage::Fragment, LogicalStage::Fragment); + bind_stage(Stage::Fragment, LogicalStage::Fragment); const auto* fs_info = infos[static_cast(LogicalStage::Fragment)]; key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u; + key.num_color_attachments = std::bit_width(key.mrt_mask); switch (regs.stage_enable.raw) { - case Liverpool::ShaderStageEnable::VgtStages::EsGs: { - if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) { + case Liverpool::ShaderStageEnable::VgtStages::EsGs: + if (!instance.IsGeometryStageSupported()) { + LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping"); return false; } - if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) { + if (regs.vgt_gs_mode.onchip || regs.vgt_strmout_config.raw) { + LOG_WARNING(Render_Vulkan, "Geometry shader features unsupported, skipping"); return false; } - if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) { + if (!bind_stage(Stage::Export, LogicalStage::Vertex)) { + return false; + } + if (!bind_stage(Stage::Geometry, LogicalStage::Geometry)) { return false; } break; - } - case Liverpool::ShaderStageEnable::VgtStages::LsHs: { + case Liverpool::ShaderStageEnable::VgtStages::LsHs: if (!instance.IsTessellationSupported() || (regs.tess_config.type == AmdGpu::TessellationType::Isoline && !instance.IsTessellationIsolinesSupported())) { return false; } - if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) { + if (!bind_stage(Stage::Hull, LogicalStage::TessellationControl)) { return false; } - if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) { + if (!bind_stage(Stage::Vertex, LogicalStage::TessellationEval)) { return false; } - if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) { + if (!bind_stage(Stage::Local, LogicalStage::Vertex)) { return false; } break; - } - default: { - TryBindStage(Stage::Vertex, LogicalStage::Vertex); + default: + bind_stage(Stage::Vertex, LogicalStage::Vertex); break; } - } const auto* vs_info = infos[static_cast(Shader::LogicalStage::Vertex)]; if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) { @@ -465,40 +470,6 @@ bool PipelineCache::RefreshGraphicsKey() { } } - // Second pass to fill remain CB pipeline key data - for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { - auto const& col_buf = regs.color_buffers[cb]; - if (skip_cb_binding || !col_buf) { - // No attachment bound and no incremented index. - continue; - } - - const u32 target_mask = regs.color_target_mask.GetMask(cb); - if (!target_mask || (key.mrt_mask & (1u << cb)) == 0) { - // Attachment is masked out by either color_target_mask or shader mrt_mask. In the case - // of the latter we need to change format to undefined, and either way we need to - // increment the index for the null attachment binding. - key.color_buffers[remapped_cb++] = {}; - continue; - } - - key.blend_controls[remapped_cb] = regs.blend_control[cb]; - key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && - !col_buf.info.blend_bypass); - // Apply swizzle to target mask - for (u32 i = 0; i < 4; i++) { - if (target_mask & (1 << i)) { - const auto swizzled_comp = - static_cast(key.color_buffers[remapped_cb].swizzle.array[i]); - constexpr u32 min_comp = static_cast(AmdGpu::CompSwizzle::Red); - const u32 comp = swizzled_comp >= min_comp ? swizzled_comp - min_comp : i; - key.write_masks[remapped_cb] |= vk::ColorComponentFlagBits{1u << comp}; - } - } - key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb)); - ++remapped_cb; - } - return true; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index ba3407b48..e077f857c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -74,6 +74,7 @@ public: private: bool RefreshGraphicsKey(); + bool RefreshGraphicsStages(); bool RefreshComputeKey(); void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6829979e3..7fe2ce0cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -113,6 +113,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.width = instance.GetMaxFramebufferWidth(); state.height = instance.GetMaxFramebufferHeight(); state.num_layers = std::numeric_limits::max(); + state.num_color_attachments = std::bit_width(mrt_mask); cb_descs.clear(); db_desc.reset(); @@ -125,29 +126,31 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { const bool skip_cb_binding = regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable; - for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { - const auto& col_buf = regs.color_buffers[col_buf_id]; - if (skip_cb_binding || !col_buf) { + + for (s32 cb = 0; cb < state.num_color_attachments && !skip_cb_binding; ++cb) { + const auto& col_buf = regs.color_buffers[cb]; + if (!col_buf) { + state.color_attachments[cb].imageView = VK_NULL_HANDLE; continue; } // Skip stale color buffers if shader doesn't output to them. Otherwise it will perform // an unnecessary transition and may result in state conflict if the resource is already // bound for reading. - if ((mrt_mask & (1 << col_buf_id)) == 0) { - state.color_attachments[state.num_color_attachments++].imageView = VK_NULL_HANDLE; + if ((mrt_mask & (1 << cb)) == 0) { + state.color_attachments[cb].imageView = VK_NULL_HANDLE; continue; } // If the color buffer is still bound but rendering to it is disabled by the target // mask, we need to prevent the render area from being affected by unbound render target // extents. - if (!regs.color_target_mask.GetMask(col_buf_id)) { - state.color_attachments[state.num_color_attachments++].imageView = VK_NULL_HANDLE; + if (!regs.color_target_mask.GetMask(cb)) { + state.color_attachments[cb].imageView = VK_NULL_HANDLE; continue; } - const auto& hint = liverpool->last_cb_extent[col_buf_id]; + const auto& hint = liverpool->last_cb_extent[cb]; auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{col_buf, hint}); const auto& image_view = texture_cache.FindRenderTarget(desc); @@ -163,7 +166,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { state.width = std::min(state.width, std::max(image.info.size.width >> mip, 1u)); state.height = std::min(state.height, std::max(image.info.size.height >> mip, 1u)); state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); - state.color_attachments[state.num_color_attachments++] = { + state.color_attachments[cb] = { .imageView = *image_view.image_view, .imageLayout = vk::ImageLayout::eUndefined, .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, @@ -1094,7 +1097,6 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const { auto& dynamic_state = scheduler.GetDynamicState(); dynamic_state.SetBlendConstants(liverpool->regs.blend_constants); - dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks()); // Commit new dynamic state to the command buffer. dynamic_state.Commit(instance, scheduler.CommandBuffer()); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 910142232..9e5a4b521 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -1,10 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "common/assert.h" #include "common/debug.h" -#include "common/logging/log.h" #include "imgui/renderer/texture_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -325,12 +323,6 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd dirty_state.blend_constants = false; cmdbuf.setBlendConstants(blend_constants.data()); } - if (dirty_state.color_write_masks) { - dirty_state.color_write_masks = false; - if (instance.IsDynamicColorWriteMaskSupported()) { - cmdbuf.setColorWriteMaskEXT(0, color_write_masks); - } - } if (dirty_state.line_width) { dirty_state.line_width = false; cmdbuf.setLineWidth(line_width); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 449fa2cca..949dbd6f5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -5,6 +5,7 @@ #include #include + #include "common/types.h" #include "common/unique_function.h" #include "video_core/amdgpu/liverpool.h" @@ -107,7 +108,6 @@ struct DynamicState { bool front_face : 1; bool blend_constants : 1; - bool color_write_masks : 1; bool line_width : 1; } dirty_state{}; @@ -143,7 +143,6 @@ struct DynamicState { vk::FrontFace front_face{}; std::array blend_constants{}; - ColorWriteMasks color_write_masks{}; float line_width{}; /// Commits the dynamic state to the provided command buffer. @@ -307,13 +306,6 @@ struct DynamicState { } } - void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) { - if (!std::ranges::equal(color_write_masks, color_write_masks_)) { - color_write_masks = color_write_masks_; - dirty_state.color_write_masks = true; - } - } - void SetLineWidth(const float width) { if (line_width != width) { line_width = width;