Handle DS_READ_U16, DS_WRITE_B16, DS_ADD_U64 (#3007)

* Handle DS_READ_U16 & DS_WRITE_B16

* Refactor DS translation

* Translate DS_ADD_U64

* format

* Fix RingAccessElimination after changing WriteShared64 type

* Simplify bounds checking in generated SPIR-V
This commit is contained in:
Marcin Mikołajczyk
2025-06-09 21:03:38 +02:00
committed by GitHub
parent a71bfb30a2
commit 217d32b502
19 changed files with 323 additions and 89 deletions

View File

@@ -212,7 +212,8 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT,
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
features = feature_chain.get().features;
const vk::StructureChain properties_chain = physical_device.getProperties2<
@@ -283,6 +284,20 @@ bool Instance::CreateDevice() {
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
}
workgroup_memory_explicit_layout =
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
if (workgroup_memory_explicit_layout) {
workgroup_memory_explicit_layout_features =
feature_chain.get<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout);
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayoutScalarBlockLayout: {}",
workgroup_memory_explicit_layout_features
.workgroupMemoryExplicitLayoutScalarBlockLayout);
LOG_INFO(
Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
}
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@@ -420,6 +435,15 @@ bool Instance::CreateDevice() {
.shaderImageFloat32AtomicMinMax =
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax,
},
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR{
.workgroupMemoryExplicitLayout =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout,
.workgroupMemoryExplicitLayoutScalarBlockLayout =
workgroup_memory_explicit_layout_features
.workgroupMemoryExplicitLayoutScalarBlockLayout,
.workgroupMemoryExplicitLayout16BitAccess =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess,
},
#ifdef __APPLE__
portability_features,
#endif
@@ -452,6 +476,9 @@ bool Instance::CreateDevice() {
if (!shader_atomic_float2) {
device_chain.unlink<vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT>();
}
if (!workgroup_memory_explicit_layout) {
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
}
auto [device_result, dev] = physical_device.createDeviceUnique(device_chain.get());
if (device_result != vk::Result::eSuccess) {

View File

@@ -171,6 +171,12 @@ public:
return shader_atomic_float2 && shader_atomic_float2_features.shaderImageFloat32AtomicMinMax;
}
/// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported.
bool IsWorkgroupMemoryExplicitLayoutSupported() const {
return workgroup_memory_explicit_layout &&
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
}
/// Returns true when geometry shaders are supported by the device
bool IsGeometryStageSupported() const {
return features.geometryShader;
@@ -349,6 +355,8 @@ private:
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR
workgroup_memory_explicit_layout_features;
vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
std::string vendor_name;
@@ -374,6 +382,7 @@ private:
bool amd_gcn_shader{};
bool amd_shader_trinary_minmax{};
bool shader_atomic_float2{};
bool workgroup_memory_explicit_layout{};
bool portability_subset{};
};

View File

@@ -210,7 +210,6 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_preserve = bool(vk12_props.shaderDenormPreserveFloat32),
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
.support_explicit_workgroup_layout = true,
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
@@ -218,6 +217,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
.supports_workgroup_explicit_memory_layout =
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||