This commit is contained in:
Marcin Mikołajczyk 2025-06-05 00:35:40 +01:00
parent 0b2128448a
commit d694b8cf7e
6 changed files with 37 additions and 21 deletions

View File

@ -14,7 +14,8 @@ Id EmitLoadSharedU16(EmitContext& ctx, Id offset) {
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
return AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
const Id pointer =
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U16, pointer);
});
}
@ -25,7 +26,8 @@ Id EmitLoadSharedU32(EmitContext& ctx, Id offset) {
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
return AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
const Id pointer =
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
return ctx.OpLoad(ctx.U32[1], pointer);
});
}
@ -36,7 +38,8 @@ Id EmitLoadSharedU64(EmitContext& ctx, Id offset) {
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
return AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer{ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
return ctx.OpLoad(ctx.U64, pointer);
});
}
@ -47,7 +50,8 @@ void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) {
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 2u)};
AccessBoundsCheck<16>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer = ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
const Id pointer =
ctx.OpAccessChain(ctx.shared_u16, ctx.shared_memory_u16, ctx.u32_zero_value, index);
ctx.OpStore(pointer, value);
return ctx.OpUndef(ctx.U16);
});
@ -59,7 +63,8 @@ void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) {
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 4u)};
AccessBoundsCheck<32>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
const Id pointer =
ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index);
ctx.OpStore(pointer, value);
return ctx.OpUndef(ctx.U32[1]);
});
@ -71,7 +76,8 @@ void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) {
const u32 num_elements{Common::DivCeil(ctx.runtime_info.cs_info.shared_memory_size, 8u)};
AccessBoundsCheck<64>(ctx, index, ctx.ConstU32(num_elements), [&] {
const Id pointer{ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
const Id pointer{
ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)};
ctx.OpStore(pointer, value);
return ctx.OpUndef(ctx.U64);
});

View File

@ -219,17 +219,19 @@ void Translator::DS_WRITE(int bit_size, bool is_signed, bool is_pair, bool strid
if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data0), addr0);
} else {
ir.WriteShared(
64, ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0), ir.GetVectorReg(data0 + 1))),
addr0);
ir.WriteShared(64,
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data0),
ir.GetVectorReg(data0 + 1))),
addr0);
}
const IR::U32 addr1 = ir.IAdd(addr, ir.Imm32(u32(inst.control.ds.offset1 * adj)));
if (bit_size == 32) {
ir.WriteShared(32, ir.GetVectorReg(data1), addr1);
} else {
ir.WriteShared(
64, ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1), ir.GetVectorReg(data1 + 1))),
addr1);
ir.WriteShared(64,
ir.PackUint2x32(ir.CompositeConstruct(ir.GetVectorReg(data1),
ir.GetVectorReg(data1 + 1))),
addr1);
}
} else if (bit_size == 64) {
const IR::U32 addr0 = ir.IAdd(addr, ir.Imm32(offset));

View File

@ -37,7 +37,8 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_
// Only perform the transform if the host shared memory is insufficient
// or the device does not support VK_KHR_workgroup_memory_explicit_layout
const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
if (shared_memory_size <= profile.max_shared_memory_size && profile.supports_workgroup_explicit_memory_layout) {
if (shared_memory_size <= profile.max_shared_memory_size &&
profile.supports_workgroup_explicit_memory_layout) {
return;
}
// Add buffer binding for shared memory storage buffer.

View File

@ -284,16 +284,19 @@ bool Instance::CreateDevice() {
LOG_INFO(Render_Vulkan, "- shaderImageFloat32AtomicMinMax: {}",
shader_atomic_float2_features.shaderImageFloat32AtomicMinMax);
}
workgroup_memory_explicit_layout = add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
workgroup_memory_explicit_layout =
add_extension(VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME);
if (workgroup_memory_explicit_layout) {
workgroup_memory_explicit_layout_features =
feature_chain.get<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout);
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayoutScalarBlockLayout: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout);
LOG_INFO(Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
workgroup_memory_explicit_layout_features
.workgroupMemoryExplicitLayoutScalarBlockLayout);
LOG_INFO(
Render_Vulkan, "- workgroupMemoryExplicitLayout16BitAccess: {}",
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess);
}
const bool calibrated_timestamps =
TRACY_GPU_ENABLED ? add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) : false;
@ -436,7 +439,8 @@ bool Instance::CreateDevice() {
.workgroupMemoryExplicitLayout =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout,
.workgroupMemoryExplicitLayoutScalarBlockLayout =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout,
workgroup_memory_explicit_layout_features
.workgroupMemoryExplicitLayoutScalarBlockLayout,
.workgroupMemoryExplicitLayout16BitAccess =
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess,
},

View File

@ -173,7 +173,8 @@ public:
/// Returns true when VK_KHR_workgroup_memory_explicit_layout is supported.
bool IsWorkgroupMemoryExplicitLayoutSupported() const {
return workgroup_memory_explicit_layout && workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
return workgroup_memory_explicit_layout &&
workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess;
}
/// Returns true when geometry shaders are supported by the device
@ -354,7 +355,8 @@ private:
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features;
vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features;
vk::PhysicalDeviceShaderAtomicFloat2FeaturesEXT shader_atomic_float2_features;
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR workgroup_memory_explicit_layout_features;
vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR
workgroup_memory_explicit_layout_features;
vk::DriverIdKHR driver_id;
vk::UniqueDebugUtilsMessengerEXT debug_callback{};
std::string vendor_name;

View File

@ -216,7 +216,8 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),
.supports_workgroup_explicit_memory_layout = instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
.supports_workgroup_explicit_memory_layout =
instance_.IsWorkgroupMemoryExplicitLayoutSupported(),
.needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() &&
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||