diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 3e906745b..ba7439273 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -82,23 +82,29 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s instance.GetDevice()); Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser"); + const vk::SpecializationMapEntry specialization_map_entry = { + .constantID = 0, + .offset = 0, + .size = sizeof(u32), + }; + + const vk::SpecializationInfo specialization_info = { + .mapEntryCount = 1, + .pMapEntries = &specialization_map_entry, + .dataSize = sizeof(u32), + .pData = &CACHING_PAGEBITS, + }; + const vk::PipelineShaderStageCreateInfo shader_ci = { .stage = vk::ShaderStageFlagBits::eCompute, .module = module, .pName = "main", - }; - - const vk::PushConstantRange push_constants = { - .stageFlags = vk::ShaderStageFlagBits::eCompute, - .offset = 0, - .size = sizeof(u32), + .pSpecializationInfo = &specialization_info, }; const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1U, .pSetLayouts = &(*fault_process_desc_layout), - .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_constants, }; auto [layout_result, layout] = instance.GetDevice().createPipelineLayoutUnique(layout_info); @@ -675,7 +681,6 @@ void BufferCache::ProcessFaultBuffer() { }); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline); cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes); - cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS); constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u); cmdbuf.dispatch(num_workgroups, 1, 1); diff --git a/src/video_core/host_shaders/fault_buffer_process.comp b/src/video_core/host_shaders/fault_buffer_process.comp index b381a1299..cdff9f302 100644 --- a/src/video_core/host_shaders/fault_buffer_process.comp +++ b/src/video_core/host_shaders/fault_buffer_process.comp @@ -3,7 +3,6 @@ #version 450 #extension GL_ARB_gpu_shader_int64 : enable -#extension GL_EXT_shader_atomic_int64 : enable layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; @@ -12,13 +11,16 @@ layout(std430, binding = 0) buffer input_buf { }; layout(std430, binding = 1) buffer output_buf { - uint64_t parsed_buffer[]; + uint64_t download_buffer[]; }; -layout(push_constant) uniform parsing_info { - uint caching_pagebits; +// Overlap for 32 bit atomics +layout(std430, binding = 1) buffer output_buf32 { + uint download_buffer32[]; }; +layout(constant_id = 0) const uint CACHING_PAGEBITS = 0; + void main() { uint id = gl_GlobalInvocationID.x; uint word = fault_buffer[id]; @@ -31,10 +33,10 @@ void main() { uint bit = findLSB(word); word &= word - 1; uint page = base_bit + bit; - uint store_index = uint(atomicAdd(parsed_buffer[0], 1u)) + 1u; + uint store_index = atomicAdd(download_buffer32[0], 1u); // It is very unlikely, but should we check for overflow? if (store_index < 1024u) { // only support 1024 page faults - parsed_buffer[store_index] = uint64_t(page) << caching_pagebits; + download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS; } } } \ No newline at end of file