mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 12:34:37 +00:00
Use spec const and 32 bit atomic
This commit is contained in:
parent
f260a61d26
commit
0bf4e75da8
@ -82,23 +82,29 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
||||
instance.GetDevice());
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
|
||||
|
||||
const vk::SpecializationMapEntry specialization_map_entry = {
|
||||
.constantID = 0,
|
||||
.offset = 0,
|
||||
.size = sizeof(u32),
|
||||
};
|
||||
|
||||
const vk::SpecializationInfo specialization_info = {
|
||||
.mapEntryCount = 1,
|
||||
.pMapEntries = &specialization_map_entry,
|
||||
.dataSize = sizeof(u32),
|
||||
.pData = &CACHING_PAGEBITS,
|
||||
};
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = module,
|
||||
.pName = "main",
|
||||
};
|
||||
|
||||
const vk::PushConstantRange push_constants = {
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 0,
|
||||
.size = sizeof(u32),
|
||||
.pSpecializationInfo = &specialization_info,
|
||||
};
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 1U,
|
||||
.pSetLayouts = &(*fault_process_desc_layout),
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &push_constants,
|
||||
};
|
||||
auto [layout_result, layout] =
|
||||
instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||
@ -675,7 +681,6 @@ void BufferCache::ProcessFaultBuffer() {
|
||||
});
|
||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes);
|
||||
cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS);
|
||||
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
|
||||
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
|
||||
cmdbuf.dispatch(num_workgroups, 1, 1);
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_gpu_shader_int64 : enable
|
||||
#extension GL_EXT_shader_atomic_int64 : enable
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
@ -12,13 +11,16 @@ layout(std430, binding = 0) buffer input_buf {
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) buffer output_buf {
|
||||
uint64_t parsed_buffer[];
|
||||
uint64_t download_buffer[];
|
||||
};
|
||||
|
||||
layout(push_constant) uniform parsing_info {
|
||||
uint caching_pagebits;
|
||||
// Overlap for 32 bit atomics
|
||||
layout(std430, binding = 1) buffer output_buf32 {
|
||||
uint download_buffer32[];
|
||||
};
|
||||
|
||||
layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
|
||||
|
||||
void main() {
|
||||
uint id = gl_GlobalInvocationID.x;
|
||||
uint word = fault_buffer[id];
|
||||
@ -31,10 +33,10 @@ void main() {
|
||||
uint bit = findLSB(word);
|
||||
word &= word - 1;
|
||||
uint page = base_bit + bit;
|
||||
uint store_index = uint(atomicAdd(parsed_buffer[0], 1u)) + 1u;
|
||||
uint store_index = atomicAdd(download_buffer32[0], 1u);
|
||||
// It is very unlikely, but should we check for overflow?
|
||||
if (store_index < 1024u) { // only support 1024 page faults
|
||||
parsed_buffer[store_index] = uint64_t(page) << caching_pagebits;
|
||||
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user