diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp index d8ea20b8c..180819b94 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp @@ -14,7 +14,8 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)}; return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); + const Id pointer = + ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); return ctx.OpLoad(ctx.U16, pointer); }); } @@ -25,7 +26,8 @@ Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); + const Id pointer = + ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); return ctx.OpLoad(ctx.U32[1], pointer); }); } @@ -36,7 +38,8 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)}; return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer{ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; return ctx.OpLoad(ctx.U64, pointer); }); } @@ -47,7 +50,8 @@ void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)}; AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); + const Id pointer = + ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index); ctx.OpStore(pointer, value); return ctx.OpUndef(ctx.U16); }); @@ -59,7 +63,8 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)}; AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); + const Id pointer = + ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index); ctx.OpStore(pointer, value); return ctx.OpUndef(ctx.U32[1]); }); @@ -71,7 +76,8 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)}; AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] { - const Id pointer{ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; + const Id pointer{ + ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; ctx.OpStore(pointer, value); return ctx.OpUndef(ctx.U64); }); diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index c34393594..4b6a58fd0 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -219,17 +219,19 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid if (bit_size == 32) { ir.WriteShared(32, ir.GetVectorReg(data0), addr0); } else { - ir.WriteShared( - 64, ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1))), - addr0); + ir.WriteShared(64, + ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0), + ir.GetVectorReg(data0 + 1))), + addr0); } const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj))); if (bit_size == 32) { ir.WriteShared(32, ir.GetVectorReg(data1), addr1); } else { - ir.WriteShared( - 64, ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1))), - addr1); + ir.WriteShared(64, + ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1), + ir.GetVectorReg(data1 + 1))), + addr1); } } else if (bit_size == 64) { const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset)); diff --git a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp index 90e756e61..409c05940 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp @@ -37,7 +37,8 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_ // Only perform the transform if the host shared memory is insufficient // or the device does not support VK_KHR_workgroup_memory_explicit_layout const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; - if (shared_memory_size <= profile.max_shared_memory_size && profile.supports_workgroup_explicit_memory_layout) { + if (shared_memory_size <= profile.max_shared_memory_size && + profile.supports_workgroup_explicit_memory_layout) { return; } // Add buffer binding for shared memory storage buffer. diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0135b667f..0591e06ce 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -284,16 +284,19 @@ bool Instance::CreateDevice() { LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}", shader_atomic_float2_features.shaderImageFloat32AtomicMinMax); } - workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); + workgroup_memory_explicit_layout = + add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME); if (workgroup_memory_explicit_layout) { workgroup_memory_explicit_layout_features = feature_chain.get(); LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout: {}", workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout); LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayoutScalarBlockLayout: {}", - workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout); - LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}", - workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess); + workgroup_memory_explicit_layout_features + .workgroupMemoryExplicitLayoutScalarBlockLayout); + LOG_INFO( + Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}", + workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess); } const bool calibrated_timestamps = TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false; @@ -436,7 +439,8 @@ bool Instance::CreateDevice() { .workgroupMemoryExplicitLayout = workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout, .workgroupMemoryExplicitLayoutScalarBlockLayout = - workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout, + workgroup_memory_explicit_layout_features + .workgroupMemoryExplicitLayoutScalarBlockLayout, .workgroupMemoryExplicitLayout16BitAccess = workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess, }, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 9b9adb768..c687e6f67 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -173,7 +173,8 @@ public: /// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported. bool IsWorkgroupMemoryExplicitLayoutSupported() const { - return workgroup_memory_explicit_layout && workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess; + return workgroup_memory_explicit_layout && + workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess; } /// Returns true when geometry shaders are supported by the device @@ -354,7 +355,8 @@ private: vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features; - vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_memory_explicit_layout_features; + vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR + workgroup_memory_explicit_layout_features; vk::DriverIdKHR driver_id; vk::UniqueDebugUtilsMessengerEXT debug_callback{}; std::string vendor_name; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 55f1d3d22..74b27b6a6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -216,7 +216,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, // TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed. .supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(), .supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(), - .supports_workgroup_explicit_memory_layout = instance_.IsWorkgroupMemoryExplicitLayoutSupported(), + .supports_workgroup_explicit_memory_layout = + instance_.IsWorkgroupMemoryExplicitLayoutSupported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||