Use spec const and 32 bit atomic

2025-07-27 12:34:37 +00:00 · 2025-04-29 17:27:06 +02:00 · 2025-04-29 17:27:06 +02:00 · 0bf4e75da8
commit 0bf4e75da8
parent f260a61d26
2 changed files with 22 additions and 15 deletions
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@ -82,23 +82,29 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
        instance.GetDevice());
    Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");

+    const vk::SpecializationMapEntry specialization_map_entry = {
+        .constantID = 0,
+        .offset = 0,
+        .size = sizeof(u32),
+    };
+
+    const vk::SpecializationInfo specialization_info = {
+        .mapEntryCount = 1,
+        .pMapEntries = &specialization_map_entry,
+        .dataSize = sizeof(u32),
+        .pData = &CACHING_PAGEBITS,
+    };
+
    const vk::PipelineShaderStageCreateInfo shader_ci = {
        .stage = vk::ShaderStageFlagBits::eCompute,
        .module = module,
        .pName = "main",
-    };
-
-    const vk::PushConstantRange push_constants = {
-        .stageFlags = vk::ShaderStageFlagBits::eCompute,
-        .offset = 0,
-        .size = sizeof(u32),
+        .pSpecializationInfo = &specialization_info,
    };

    const vk::PipelineLayoutCreateInfo layout_info = {
        .setLayoutCount = 1U,
        .pSetLayouts = &(*fault_process_desc_layout),
-        .pushConstantRangeCount = 1,
-        .pPushConstantRanges = &push_constants,
    };
    auto [layout_result, layout] =
        instance.GetDevice().createPipelineLayoutUnique(layout_info);
@ -675,7 +681,6 @@ void BufferCache::ProcessFaultBuffer() {
    });
    cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
    cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes);
-    cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS);
    constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
    constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
    cmdbuf.dispatch(num_workgroups, 1, 1);
--- a/src/video_core/host_shaders/fault_buffer_process.comp
+++ b/src/video_core/host_shaders/fault_buffer_process.comp
@ -3,7 +3,6 @@

 #version 450
 #extension GL_ARB_gpu_shader_int64 : enable
-#extension GL_EXT_shader_atomic_int64 : enable

 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;

@ -12,13 +11,16 @@ layout(std430, binding = 0) buffer input_buf {
 };

 layout(std430, binding = 1) buffer output_buf {
-    uint64_t parsed_buffer[];
+    uint64_t download_buffer[];
 };

-layout(push_constant) uniform parsing_info {
-    uint caching_pagebits;
+// Overlap for 32 bit atomics
+layout(std430, binding = 1) buffer output_buf32 {
+    uint download_buffer32[];
 };

+layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
+
 void main() {
    uint id = gl_GlobalInvocationID.x;
    uint word = fault_buffer[id];
@ -31,10 +33,10 @@ void main() {
        uint bit = findLSB(word);
        word &= word - 1;
        uint page = base_bit + bit;
-        uint store_index = uint(atomicAdd(parsed_buffer[0], 1u)) + 1u;
+        uint store_index = atomicAdd(download_buffer32[0], 1u);
        // It is very unlikely, but should we check for overflow?
        if (store_index < 1024u) { // only support 1024 page faults
-            parsed_buffer[store_index] = uint64_t(page) << caching_pagebits;
+            download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
        }
    }
 }