mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-12-09 13:19:00 +00:00
vk_pipeline_cache: Cleanup graphics key refresh (#3449)
* vk_pipeline_cache: Cleanup graphics key refresh * position: Don't assert on None mapping Also check outputs in runtime info so shader is recompiled if they change
This commit is contained in:
@@ -918,6 +918,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||
src/shader_recompiler/ir/opcodes.inc
|
||||
src/shader_recompiler/ir/patch.cpp
|
||||
src/shader_recompiler/ir/patch.h
|
||||
src/shader_recompiler/ir/position.h
|
||||
src/shader_recompiler/ir/post_order.cpp
|
||||
src/shader_recompiler/ir/post_order.h
|
||||
src/shader_recompiler/ir/program.cpp
|
||||
|
||||
@@ -93,17 +93,24 @@ void Translator::ExportRenderTarget(const GcnInst& inst) {
|
||||
}
|
||||
}
|
||||
|
||||
// Metal seems to have an issue where 8-bit unorm/snorm/sRGB outputs to render target
|
||||
// need a bias applied to round correctly; detect and set the flag for that here.
|
||||
const auto needs_unorm_fixup = profile.needs_unorm_fixup &&
|
||||
(color_buffer.num_format == AmdGpu::NumberFormat::Unorm ||
|
||||
color_buffer.num_format == AmdGpu::NumberFormat::Snorm ||
|
||||
color_buffer.num_format == AmdGpu::NumberFormat::Srgb) &&
|
||||
(color_buffer.data_format == AmdGpu::DataFormat::Format8 ||
|
||||
color_buffer.data_format == AmdGpu::DataFormat::Format8_8 ||
|
||||
color_buffer.data_format == AmdGpu::DataFormat::Format8_8_8_8);
|
||||
|
||||
// Swizzle components and export
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
const u32 comp_swizzle = static_cast<u32>(color_buffer.swizzle.array[i]);
|
||||
constexpr u32 min_swizzle = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||
const auto swizzled_comp =
|
||||
components[comp_swizzle >= min_swizzle ? comp_swizzle - min_swizzle : i];
|
||||
const auto swizzled_comp = components[color_buffer.swizzle.Map(i)];
|
||||
if (swizzled_comp.IsEmpty()) {
|
||||
continue;
|
||||
}
|
||||
auto converted = ApplyWriteNumberConversion(ir, swizzled_comp, color_buffer.num_conversion);
|
||||
if (color_buffer.needs_unorm_fixup) {
|
||||
if (needs_unorm_fixup) {
|
||||
// FIXME: Fix-up for GPUs where float-to-unorm rounding is off from expected.
|
||||
converted = ir.FPSub(converted, ir.Imm32(1.f / 127500.f));
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
@@ -45,8 +46,12 @@ inline void ExportPosition(IREmitter& ir, const auto& stage, Attribute attribute
|
||||
case Output::GsMrtIndex:
|
||||
ir.SetAttribute(IR::Attribute::RenderTargetId, value);
|
||||
break;
|
||||
case Output::None:
|
||||
LOG_WARNING(Render_Recompiler, "The {} component of {} isn't mapped, skipping",
|
||||
"xyzw"[comp], NameOf(attribute));
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Unhandled output {} on attribute {}", u32(output), u32(attribute));
|
||||
UNREACHABLE_MSG("Unhandled output {} on attribute {}", u32(output), NameOf(attribute));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ struct Profile {
|
||||
bool needs_manual_interpolation{};
|
||||
bool needs_lds_barriers{};
|
||||
bool needs_buffer_offsets{};
|
||||
bool needs_unorm_fixup{};
|
||||
u64 max_ubo_size{};
|
||||
u32 max_viewport_width{};
|
||||
u32 max_viewport_height{};
|
||||
|
||||
@@ -93,7 +93,8 @@ struct VertexRuntimeInfo {
|
||||
u32 hs_output_cp_stride{};
|
||||
|
||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||
return emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||
return num_outputs == other.num_outputs && outputs == other.outputs &&
|
||||
emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||
clip_disable == other.clip_disable && tess_type == other.tess_type &&
|
||||
tess_topology == other.tess_topology &&
|
||||
tess_partitioning == other.tess_partitioning &&
|
||||
@@ -158,8 +159,9 @@ struct GeometryRuntimeInfo {
|
||||
u64 vs_copy_hash;
|
||||
|
||||
bool operator==(const GeometryRuntimeInfo& other) const noexcept {
|
||||
return num_invocations && other.num_invocations &&
|
||||
output_vertices == other.output_vertices && in_primitive == other.in_primitive &&
|
||||
return num_outputs == other.num_outputs && outputs == other.outputs && num_invocations &&
|
||||
other.num_invocations && output_vertices == other.output_vertices &&
|
||||
in_primitive == other.in_primitive &&
|
||||
std::ranges::equal(out_primitive, other.out_primitive);
|
||||
}
|
||||
};
|
||||
@@ -177,8 +179,6 @@ struct PsColorBuffer {
|
||||
AmdGpu::NumberFormat num_format : 4;
|
||||
AmdGpu::NumberConversion num_conversion : 3;
|
||||
AmdGpu::Liverpool::ShaderExportFormat export_format : 4;
|
||||
u32 needs_unorm_fixup : 1;
|
||||
u32 pad : 20;
|
||||
AmdGpu::CompMapping swizzle;
|
||||
|
||||
bool operator==(const PsColorBuffer& other) const noexcept = default;
|
||||
|
||||
@@ -794,6 +794,7 @@ struct Liverpool {
|
||||
ReverseSubtract = 4,
|
||||
};
|
||||
|
||||
u32 raw;
|
||||
BitField<0, 5, BlendFactor> color_src_factor;
|
||||
BitField<5, 3, BlendFunc> color_func;
|
||||
BitField<8, 5, BlendFactor> color_dst_factor;
|
||||
@@ -803,6 +804,10 @@ struct Liverpool {
|
||||
BitField<29, 1, u32> separate_alpha_blend;
|
||||
BitField<30, 1, u32> enable;
|
||||
BitField<31, 1, u32> disable_rop3;
|
||||
|
||||
bool operator==(const BlendControl& other) const {
|
||||
return raw == other.raw;
|
||||
}
|
||||
};
|
||||
|
||||
union ColorControl {
|
||||
@@ -919,7 +924,7 @@ struct Liverpool {
|
||||
INSERT_PADDING_WORDS(2);
|
||||
|
||||
operator bool() const {
|
||||
return info.format != DataFormat::FormatInvalid;
|
||||
return base_address && info.format != DataFormat::FormatInvalid;
|
||||
}
|
||||
|
||||
u32 Pitch() const {
|
||||
|
||||
@@ -85,7 +85,7 @@ enum class NumberClass {
|
||||
Uint,
|
||||
};
|
||||
|
||||
enum class CompSwizzle : u32 {
|
||||
enum class CompSwizzle : u8 {
|
||||
Zero = 0,
|
||||
One = 1,
|
||||
Red = 4,
|
||||
@@ -136,6 +136,12 @@ union CompMapping {
|
||||
return result;
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 Map(u32 comp) const {
|
||||
const u32 swizzled_comp = u32(array[comp]);
|
||||
constexpr u32 min_comp = u32(AmdGpu::CompSwizzle::Red);
|
||||
return swizzled_comp >= min_comp ? swizzled_comp - min_comp : comp;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
T ApplySingle(const std::array<T, 4>& data, const CompSwizzle swizzle) const {
|
||||
|
||||
@@ -168,9 +168,6 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
dynamic_states.push_back(vk::DynamicState::eDepthBoundsTestEnable);
|
||||
dynamic_states.push_back(vk::DynamicState::eDepthBounds);
|
||||
}
|
||||
if (instance.IsDynamicColorWriteMaskSupported()) {
|
||||
dynamic_states.push_back(vk::DynamicState::eColorWriteMaskEXT);
|
||||
}
|
||||
if (instance.IsVertexInputDynamicState()) {
|
||||
dynamic_states.push_back(vk::DynamicState::eVertexInputEXT);
|
||||
} else if (!vertex_bindings.empty()) {
|
||||
@@ -291,11 +288,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||
.alphaBlendOp = control.separate_alpha_blend
|
||||
? LiverpoolToVK::BlendOp(control.alpha_func)
|
||||
: color_blend,
|
||||
.colorWriteMask =
|
||||
instance.IsDynamicColorWriteMaskSupported()
|
||||
? vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
|
||||
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA
|
||||
: key.write_masks[i],
|
||||
.colorWriteMask = vk::ColorComponentFlags{key.write_masks[i]},
|
||||
};
|
||||
|
||||
// On GCN GPU there is an additional mask which allows to control color components exported
|
||||
|
||||
@@ -38,7 +38,7 @@ struct GraphicsPipelineKey {
|
||||
u32 num_color_attachments;
|
||||
std::array<Shader::PsColorBuffer, Liverpool::NumColorBuffers> color_buffers;
|
||||
std::array<Liverpool::BlendControl, Liverpool::NumColorBuffers> blend_controls;
|
||||
std::array<vk::ColorComponentFlags, Liverpool::NumColorBuffers> write_masks;
|
||||
std::array<u32, Liverpool::NumColorBuffers> write_masks;
|
||||
Liverpool::ColorBufferMask cb_shader_mask;
|
||||
Liverpool::ColorControl::LogicOp logic_op;
|
||||
u32 num_samples;
|
||||
@@ -80,11 +80,7 @@ public:
|
||||
return fetch_shader;
|
||||
}
|
||||
|
||||
auto GetWriteMasks() const {
|
||||
return key.write_masks;
|
||||
}
|
||||
|
||||
auto GetMrtMask() const {
|
||||
u32 GetMrtMask() const {
|
||||
return key.mrt_mask;
|
||||
}
|
||||
|
||||
|
||||
@@ -255,13 +255,6 @@ bool Instance::CreateDevice() {
|
||||
// Optional
|
||||
maintenance_8 = add_extension(VK_KHR_MAINTENANCE_8_EXTENSION_NAME);
|
||||
depth_range_unrestricted = add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME);
|
||||
dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||
if (dynamic_state_3) {
|
||||
dynamic_state_3_features =
|
||||
feature_chain.get<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
|
||||
LOG_INFO(Render_Vulkan, "- extendedDynamicState3ColorWriteMask: {}",
|
||||
dynamic_state_3_features.extendedDynamicState3ColorWriteMask);
|
||||
}
|
||||
robustness2 = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
if (robustness2) {
|
||||
robustness2_features = feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>();
|
||||
@@ -426,10 +419,6 @@ bool Instance::CreateDevice() {
|
||||
.customBorderColors = true,
|
||||
.customBorderColorWithoutFormat = true,
|
||||
},
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{
|
||||
.extendedDynamicState3ColorWriteMask =
|
||||
dynamic_state_3_features.extendedDynamicState3ColorWriteMask,
|
||||
},
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
||||
.depthClipControl = true,
|
||||
},
|
||||
@@ -505,9 +494,6 @@ bool Instance::CreateDevice() {
|
||||
if (!custom_border_color) {
|
||||
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
|
||||
}
|
||||
if (!dynamic_state_3) {
|
||||
device_chain.unlink<vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT>();
|
||||
}
|
||||
if (!depth_clip_control) {
|
||||
device_chain.unlink<vk::PhysicalDeviceDepthClipControlFeaturesEXT>();
|
||||
}
|
||||
|
||||
@@ -139,12 +139,6 @@ public:
|
||||
return depth_range_unrestricted;
|
||||
}
|
||||
|
||||
/// Returns true when the extendedDynamicState3ColorWriteMask feature of
|
||||
/// VK_EXT_extended_dynamic_state3 is supported.
|
||||
bool IsDynamicColorWriteMaskSupported() const {
|
||||
return dynamic_state_3 && dynamic_state_3_features.extendedDynamicState3ColorWriteMask;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_vertex_input_dynamic_state is supported.
|
||||
bool IsVertexInputDynamicState() const {
|
||||
return vertex_input_dynamic_state;
|
||||
@@ -439,7 +433,6 @@ private:
|
||||
vk::PhysicalDeviceFeatures features;
|
||||
vk::PhysicalDeviceVulkan12Features vk12_features;
|
||||
vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features;
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
|
||||
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
|
||||
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
|
||||
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR
|
||||
@@ -461,7 +454,6 @@ private:
|
||||
bool depth_clip_control{};
|
||||
bool depth_clip_enable{};
|
||||
bool depth_range_unrestricted{};
|
||||
bool dynamic_state_3{};
|
||||
bool vertex_input_dynamic_state{};
|
||||
bool robustness2{};
|
||||
bool list_restart{};
|
||||
|
||||
@@ -241,6 +241,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
|
||||
instance.GetDriverID() == vk::DriverId::eMoltenvk,
|
||||
.needs_buffer_offsets = instance.StorageMinAlignment() > 4,
|
||||
.needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk,
|
||||
// When binding a UBO, we calculate its size considering the offset in the larger buffer
|
||||
// cache underlying resource. In some cases, it may produce sizes exceeding the system
|
||||
// maximum allowed UBO range, so we need to reduce the threshold to prevent issues.
|
||||
@@ -297,8 +298,7 @@ const ComputePipeline* PipelineCache::GetComputePipeline() {
|
||||
|
||||
bool PipelineCache::RefreshGraphicsKey() {
|
||||
std::memset(&graphics_key, 0, sizeof(GraphicsPipelineKey));
|
||||
|
||||
auto& regs = liverpool->regs;
|
||||
const auto& regs = liverpool->regs;
|
||||
auto& key = graphics_key;
|
||||
|
||||
key.z_format = regs.depth_buffer.DepthValid() ? regs.depth_buffer.z_info.format.Value()
|
||||
@@ -312,65 +312,72 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
key.provoking_vtx_last = regs.polygon_control.provoking_vtx_last;
|
||||
key.prim_type = regs.primitive_type;
|
||||
key.polygon_mode = regs.polygon_control.PolyMode();
|
||||
key.patch_control_points =
|
||||
regs.stage_enable.hs_en ? regs.ls_hs_config.hs_input_control_points.Value() : 0;
|
||||
key.logic_op = regs.color_control.rop3;
|
||||
key.num_samples = regs.NumSamples();
|
||||
key.cb_shader_mask = regs.color_shader_mask;
|
||||
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
|
||||
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
|
||||
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary
|
||||
// order. We need to do some arrays compaction at this stage
|
||||
key.num_color_attachments = 0;
|
||||
key.color_buffers.fill({});
|
||||
key.blend_controls.fill({});
|
||||
key.write_masks.fill({});
|
||||
key.vertex_buffer_formats.fill(vk::Format::eUndefined);
|
||||
|
||||
key.patch_control_points = 0;
|
||||
if (regs.stage_enable.hs_en.Value()) {
|
||||
key.patch_control_points = regs.ls_hs_config.hs_input_control_points.Value();
|
||||
}
|
||||
|
||||
// First pass of bindings check to idenitfy formats and swizzles and pass them to rhe shader
|
||||
// recompiler.
|
||||
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
if (skip_cb_binding || !col_buf) {
|
||||
// No attachment bound and no incremented index.
|
||||
// First pass to fill render target information
|
||||
for (s32 cb = 0; cb < Liverpool::NumColorBuffers && !skip_cb_binding; ++cb) {
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
const u32 target_mask = regs.color_target_mask.GetMask(cb);
|
||||
if (!col_buf || !target_mask) {
|
||||
// No attachment bound or writing to it is disabled.
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto remapped_cb = key.num_color_attachments++;
|
||||
if (!regs.color_target_mask.GetMask(cb)) {
|
||||
// Bound to null handle, skip over this attachment index.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Metal seems to have an issue where 8-bit unorm/snorm/sRGB outputs to render target
|
||||
// need a bias applied to round correctly; detect and set the flag for that here.
|
||||
const auto needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk &&
|
||||
(col_buf.GetNumberFmt() == AmdGpu::NumberFormat::Unorm ||
|
||||
col_buf.GetNumberFmt() == AmdGpu::NumberFormat::Snorm ||
|
||||
col_buf.GetNumberFmt() == AmdGpu::NumberFormat::Srgb) &&
|
||||
(col_buf.GetDataFmt() == AmdGpu::DataFormat::Format8 ||
|
||||
col_buf.GetDataFmt() == AmdGpu::DataFormat::Format8_8 ||
|
||||
col_buf.GetDataFmt() == AmdGpu::DataFormat::Format8_8_8_8);
|
||||
|
||||
key.color_buffers[remapped_cb] = Shader::PsColorBuffer{
|
||||
// Fill color target information
|
||||
key.color_buffers[cb] = Shader::PsColorBuffer{
|
||||
.data_format = col_buf.GetDataFmt(),
|
||||
.num_format = col_buf.GetNumberFmt(),
|
||||
.num_conversion = col_buf.GetNumberConversion(),
|
||||
.export_format = regs.color_export_format.GetFormat(cb),
|
||||
.needs_unorm_fixup = needs_unorm_fixup,
|
||||
.swizzle = col_buf.Swizzle(),
|
||||
};
|
||||
|
||||
// Fill color blending information
|
||||
key.blend_controls[cb] = regs.blend_control[cb];
|
||||
key.blend_controls[cb].enable.Assign(regs.blend_control[cb].enable &&
|
||||
!col_buf.info.blend_bypass);
|
||||
|
||||
// Apply swizzle to target mask
|
||||
const auto& swizzle = key.color_buffers[cb].swizzle;
|
||||
for (u32 i = 0; i < 4; ++i) {
|
||||
key.write_masks[cb] |= ((target_mask >> i) & 1) << swizzle.Map(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Compile and bind shader stages
|
||||
if (!RefreshGraphicsStages()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Second pass to mask out render targets not written by fragment shader
|
||||
for (s32 cb = 0; cb < key.num_color_attachments && !skip_cb_binding; ++cb) {
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
if (!col_buf || !regs.color_target_mask.GetMask(cb)) {
|
||||
continue;
|
||||
}
|
||||
if ((key.mrt_mask & (1u << cb)) == 0) {
|
||||
// Attachment is bound and mask allows writes but shader does not output to it.
|
||||
key.color_buffers[cb] = {};
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::RefreshGraphicsStages() {
|
||||
const auto& regs = liverpool->regs;
|
||||
auto& key = graphics_key;
|
||||
fetch_shader = std::nullopt;
|
||||
|
||||
Shader::Backend::Bindings binding{};
|
||||
const auto& TryBindStage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
|
||||
const auto bind_stage = [&](Shader::Stage stage_in, Shader::LogicalStage stage_out) -> bool {
|
||||
const auto stage_in_idx = static_cast<u32>(stage_in);
|
||||
const auto stage_out_idx = static_cast<u32>(stage_out);
|
||||
if (!regs.stage_enable.IsStageEnabled(stage_in_idx)) {
|
||||
@@ -405,52 +412,50 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
return true;
|
||||
};
|
||||
|
||||
const auto& IsGsFeaturesSupported = [&]() -> bool {
|
||||
// These checks are temporary until all functionality is implemented.
|
||||
return !regs.vgt_gs_mode.onchip && !regs.vgt_strmout_config.raw;
|
||||
};
|
||||
|
||||
infos.fill(nullptr);
|
||||
TryBindStage(Stage::Fragment, LogicalStage::Fragment);
|
||||
bind_stage(Stage::Fragment, LogicalStage::Fragment);
|
||||
|
||||
const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
|
||||
key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;
|
||||
key.num_color_attachments = std::bit_width(key.mrt_mask);
|
||||
|
||||
switch (regs.stage_enable.raw) {
|
||||
case Liverpool::ShaderStageEnable::VgtStages::EsGs: {
|
||||
if (!instance.IsGeometryStageSupported() || !IsGsFeaturesSupported()) {
|
||||
case Liverpool::ShaderStageEnable::VgtStages::EsGs:
|
||||
if (!instance.IsGeometryStageSupported()) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping");
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Export, LogicalStage::Vertex)) {
|
||||
if (regs.vgt_gs_mode.onchip || regs.vgt_strmout_config.raw) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shader features unsupported, skipping");
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||
if (!bind_stage(Stage::Export, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
if (!bind_stage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Liverpool::ShaderStageEnable::VgtStages::LsHs: {
|
||||
case Liverpool::ShaderStageEnable::VgtStages::LsHs:
|
||||
if (!instance.IsTessellationSupported() ||
|
||||
(regs.tess_config.type == AmdGpu::TessellationType::Isoline &&
|
||||
!instance.IsTessellationIsolinesSupported())) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||
if (!bind_stage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Vertex, LogicalStage::TessellationEval)) {
|
||||
if (!bind_stage(Stage::Vertex, LogicalStage::TessellationEval)) {
|
||||
return false;
|
||||
}
|
||||
if (!TryBindStage(Stage::Local, LogicalStage::Vertex)) {
|
||||
if (!bind_stage(Stage::Local, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
TryBindStage(Stage::Vertex, LogicalStage::Vertex);
|
||||
default:
|
||||
bind_stage(Stage::Vertex, LogicalStage::Vertex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
if (vs_info && fetch_shader && !instance.IsVertexInputDynamicState()) {
|
||||
@@ -465,40 +470,6 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass to fill remain CB pipeline key data
|
||||
for (auto cb = 0u, remapped_cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
|
||||
auto const& col_buf = regs.color_buffers[cb];
|
||||
if (skip_cb_binding || !col_buf) {
|
||||
// No attachment bound and no incremented index.
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 target_mask = regs.color_target_mask.GetMask(cb);
|
||||
if (!target_mask || (key.mrt_mask & (1u << cb)) == 0) {
|
||||
// Attachment is masked out by either color_target_mask or shader mrt_mask. In the case
|
||||
// of the latter we need to change format to undefined, and either way we need to
|
||||
// increment the index for the null attachment binding.
|
||||
key.color_buffers[remapped_cb++] = {};
|
||||
continue;
|
||||
}
|
||||
|
||||
key.blend_controls[remapped_cb] = regs.blend_control[cb];
|
||||
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
|
||||
!col_buf.info.blend_bypass);
|
||||
// Apply swizzle to target mask
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
if (target_mask & (1 << i)) {
|
||||
const auto swizzled_comp =
|
||||
static_cast<u32>(key.color_buffers[remapped_cb].swizzle.array[i]);
|
||||
constexpr u32 min_comp = static_cast<u32>(AmdGpu::CompSwizzle::Red);
|
||||
const u32 comp = swizzled_comp >= min_comp ? swizzled_comp - min_comp : i;
|
||||
key.write_masks[remapped_cb] |= vk::ColorComponentFlagBits{1u << comp};
|
||||
}
|
||||
}
|
||||
key.cb_shader_mask.SetMask(remapped_cb, regs.color_shader_mask.GetMask(cb));
|
||||
++remapped_cb;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -74,6 +74,7 @@ public:
|
||||
|
||||
private:
|
||||
bool RefreshGraphicsKey();
|
||||
bool RefreshGraphicsStages();
|
||||
bool RefreshComputeKey();
|
||||
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
|
||||
@@ -113,6 +113,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
||||
state.width = instance.GetMaxFramebufferWidth();
|
||||
state.height = instance.GetMaxFramebufferHeight();
|
||||
state.num_layers = std::numeric_limits<u32>::max();
|
||||
state.num_color_attachments = std::bit_width(mrt_mask);
|
||||
|
||||
cb_descs.clear();
|
||||
db_desc.reset();
|
||||
@@ -125,29 +126,31 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
||||
|
||||
const bool skip_cb_binding =
|
||||
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
|
||||
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
|
||||
const auto& col_buf = regs.color_buffers[col_buf_id];
|
||||
if (skip_cb_binding || !col_buf) {
|
||||
|
||||
for (s32 cb = 0; cb < state.num_color_attachments && !skip_cb_binding; ++cb) {
|
||||
const auto& col_buf = regs.color_buffers[cb];
|
||||
if (!col_buf) {
|
||||
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip stale color buffers if shader doesn't output to them. Otherwise it will perform
|
||||
// an unnecessary transition and may result in state conflict if the resource is already
|
||||
// bound for reading.
|
||||
if ((mrt_mask & (1 << col_buf_id)) == 0) {
|
||||
state.color_attachments[state.num_color_attachments++].imageView = VK_NULL_HANDLE;
|
||||
if ((mrt_mask & (1 << cb)) == 0) {
|
||||
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the color buffer is still bound but rendering to it is disabled by the target
|
||||
// mask, we need to prevent the render area from being affected by unbound render target
|
||||
// extents.
|
||||
if (!regs.color_target_mask.GetMask(col_buf_id)) {
|
||||
state.color_attachments[state.num_color_attachments++].imageView = VK_NULL_HANDLE;
|
||||
if (!regs.color_target_mask.GetMask(cb)) {
|
||||
state.color_attachments[cb].imageView = VK_NULL_HANDLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& hint = liverpool->last_cb_extent[col_buf_id];
|
||||
const auto& hint = liverpool->last_cb_extent[cb];
|
||||
auto& [image_id, desc] = cb_descs.emplace_back(std::piecewise_construct, std::tuple{},
|
||||
std::tuple{col_buf, hint});
|
||||
const auto& image_view = texture_cache.FindRenderTarget(desc);
|
||||
@@ -163,7 +166,7 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) {
|
||||
state.width = std::min<u32>(state.width, std::max(image.info.size.width >> mip, 1u));
|
||||
state.height = std::min<u32>(state.height, std::max(image.info.size.height >> mip, 1u));
|
||||
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
|
||||
state.color_attachments[state.num_color_attachments++] = {
|
||||
state.color_attachments[cb] = {
|
||||
.imageView = *image_view.image_view,
|
||||
.imageLayout = vk::ImageLayout::eUndefined,
|
||||
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
|
||||
@@ -1094,7 +1097,6 @@ void Rasterizer::UpdateDynamicState(const GraphicsPipeline& pipeline) const {
|
||||
|
||||
auto& dynamic_state = scheduler.GetDynamicState();
|
||||
dynamic_state.SetBlendConstants(liverpool->regs.blend_constants);
|
||||
dynamic_state.SetColorWriteMasks(pipeline.GetWriteMasks());
|
||||
|
||||
// Commit new dynamic state to the command buffer.
|
||||
dynamic_state.Commit(instance, scheduler.CommandBuffer());
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <mutex>
|
||||
#include "common/assert.h"
|
||||
#include "common/debug.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "imgui/renderer/texture_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
@@ -325,12 +323,6 @@ void DynamicState::Commit(const Instance& instance, const vk::CommandBuffer& cmd
|
||||
dirty_state.blend_constants = false;
|
||||
cmdbuf.setBlendConstants(blend_constants.data());
|
||||
}
|
||||
if (dirty_state.color_write_masks) {
|
||||
dirty_state.color_write_masks = false;
|
||||
if (instance.IsDynamicColorWriteMaskSupported()) {
|
||||
cmdbuf.setColorWriteMaskEXT(0, color_write_masks);
|
||||
}
|
||||
}
|
||||
if (dirty_state.line_width) {
|
||||
dirty_state.line_width = false;
|
||||
cmdbuf.setLineWidth(line_width);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
#include <condition_variable>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
@@ -107,7 +108,6 @@ struct DynamicState {
|
||||
bool front_face : 1;
|
||||
|
||||
bool blend_constants : 1;
|
||||
bool color_write_masks : 1;
|
||||
bool line_width : 1;
|
||||
} dirty_state{};
|
||||
|
||||
@@ -143,7 +143,6 @@ struct DynamicState {
|
||||
vk::FrontFace front_face{};
|
||||
|
||||
std::array<float, 4> blend_constants{};
|
||||
ColorWriteMasks color_write_masks{};
|
||||
float line_width{};
|
||||
|
||||
/// Commits the dynamic state to the provided command buffer.
|
||||
@@ -307,13 +306,6 @@ struct DynamicState {
|
||||
}
|
||||
}
|
||||
|
||||
void SetColorWriteMasks(const ColorWriteMasks& color_write_masks_) {
|
||||
if (!std::ranges::equal(color_write_masks, color_write_masks_)) {
|
||||
color_write_masks = color_write_masks_;
|
||||
dirty_state.color_write_masks = true;
|
||||
}
|
||||
}
|
||||
|
||||
void SetLineWidth(const float width) {
|
||||
if (line_width != width) {
|
||||
line_width = width;
|
||||
|
||||
Reference in New Issue
Block a user