Use spec const and 32 bit atomic

This commit is contained in:
Lander Gallastegi 2025-04-29 17:27:06 +02:00
parent f260a61d26
commit 0bf4e75da8
2 changed files with 22 additions and 15 deletions

View File

@ -82,23 +82,29 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
instance.GetDevice());
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
const vk::SpecializationMapEntry specialization_map_entry = {
.constantID = 0,
.offset = 0,
.size = sizeof(u32),
};
const vk::SpecializationInfo specialization_info = {
.mapEntryCount = 1,
.pMapEntries = &specialization_map_entry,
.dataSize = sizeof(u32),
.pData = &CACHING_PAGEBITS,
};
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
.pName = "main",
};
const vk::PushConstantRange push_constants = {
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(u32),
.pSpecializationInfo = &specialization_info,
};
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &(*fault_process_desc_layout),
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constants,
};
auto [layout_result, layout] =
instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -675,7 +681,6 @@ void BufferCache::ProcessFaultBuffer() {
});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes);
cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS);
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
cmdbuf.dispatch(num_workgroups, 1, 1);

View File

@ -3,7 +3,6 @@
#version 450
#extension GL_ARB_gpu_shader_int64 : enable
#extension GL_EXT_shader_atomic_int64 : enable
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
@ -12,13 +11,16 @@ layout(std430, binding = 0) buffer input_buf {
};
layout(std430, binding = 1) buffer output_buf {
uint64_t parsed_buffer[];
uint64_t download_buffer[];
};
layout(push_constant) uniform parsing_info {
uint caching_pagebits;
// Overlap for 32 bit atomics
layout(std430, binding = 1) buffer output_buf32 {
uint download_buffer32[];
};
layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
void main() {
uint id = gl_GlobalInvocationID.x;
uint word = fault_buffer[id];
@ -31,10 +33,10 @@ void main() {
uint bit = findLSB(word);
word &= word - 1;
uint page = base_bit + bit;
uint store_index = uint(atomicAdd(parsed_buffer[0], 1u)) + 1u;
uint store_index = atomicAdd(download_buffer32[0], 1u);
// It is very unlikely, but should we check for overflow?
if (store_index < 1024u) { // only support 1024 page faults
parsed_buffer[store_index] = uint64_t(page) << caching_pagebits;
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
}
}
}