mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 20:44:28 +00:00
Use spec const and 32 bit atomic
This commit is contained in:
parent
f260a61d26
commit
0bf4e75da8
@ -82,23 +82,29 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
|
|||||||
instance.GetDevice());
|
instance.GetDevice());
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
|
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
|
||||||
|
|
||||||
|
const vk::SpecializationMapEntry specialization_map_entry = {
|
||||||
|
.constantID = 0,
|
||||||
|
.offset = 0,
|
||||||
|
.size = sizeof(u32),
|
||||||
|
};
|
||||||
|
|
||||||
|
const vk::SpecializationInfo specialization_info = {
|
||||||
|
.mapEntryCount = 1,
|
||||||
|
.pMapEntries = &specialization_map_entry,
|
||||||
|
.dataSize = sizeof(u32),
|
||||||
|
.pData = &CACHING_PAGEBITS,
|
||||||
|
};
|
||||||
|
|
||||||
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
const vk::PipelineShaderStageCreateInfo shader_ci = {
|
||||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||||
.module = module,
|
.module = module,
|
||||||
.pName = "main",
|
.pName = "main",
|
||||||
};
|
.pSpecializationInfo = &specialization_info,
|
||||||
|
|
||||||
const vk::PushConstantRange push_constants = {
|
|
||||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
||||||
.offset = 0,
|
|
||||||
.size = sizeof(u32),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||||
.setLayoutCount = 1U,
|
.setLayoutCount = 1U,
|
||||||
.pSetLayouts = &(*fault_process_desc_layout),
|
.pSetLayouts = &(*fault_process_desc_layout),
|
||||||
.pushConstantRangeCount = 1,
|
|
||||||
.pPushConstantRanges = &push_constants,
|
|
||||||
};
|
};
|
||||||
auto [layout_result, layout] =
|
auto [layout_result, layout] =
|
||||||
instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
instance.GetDevice().createPipelineLayoutUnique(layout_info);
|
||||||
@ -675,7 +681,6 @@ void BufferCache::ProcessFaultBuffer() {
|
|||||||
});
|
});
|
||||||
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
|
||||||
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes);
|
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, writes);
|
||||||
cmdbuf.pushConstants(*fault_process_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32), &CACHING_PAGEBITS);
|
|
||||||
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
|
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
|
||||||
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
|
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
|
||||||
cmdbuf.dispatch(num_workgroups, 1, 1);
|
cmdbuf.dispatch(num_workgroups, 1, 1);
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_ARB_gpu_shader_int64 : enable
|
#extension GL_ARB_gpu_shader_int64 : enable
|
||||||
#extension GL_EXT_shader_atomic_int64 : enable
|
|
||||||
|
|
||||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
@ -12,13 +11,16 @@ layout(std430, binding = 0) buffer input_buf {
|
|||||||
};
|
};
|
||||||
|
|
||||||
layout(std430, binding = 1) buffer output_buf {
|
layout(std430, binding = 1) buffer output_buf {
|
||||||
uint64_t parsed_buffer[];
|
uint64_t download_buffer[];
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(push_constant) uniform parsing_info {
|
// Overlap for 32 bit atomics
|
||||||
uint caching_pagebits;
|
layout(std430, binding = 1) buffer output_buf32 {
|
||||||
|
uint download_buffer32[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint id = gl_GlobalInvocationID.x;
|
uint id = gl_GlobalInvocationID.x;
|
||||||
uint word = fault_buffer[id];
|
uint word = fault_buffer[id];
|
||||||
@ -31,10 +33,10 @@ void main() {
|
|||||||
uint bit = findLSB(word);
|
uint bit = findLSB(word);
|
||||||
word &= word - 1;
|
word &= word - 1;
|
||||||
uint page = base_bit + bit;
|
uint page = base_bit + bit;
|
||||||
uint store_index = uint(atomicAdd(parsed_buffer[0], 1u)) + 1u;
|
uint store_index = atomicAdd(download_buffer32[0], 1u);
|
||||||
// It is very unlikely, but should we check for overflow?
|
// It is very unlikely, but should we check for overflow?
|
||||||
if (store_index < 1024u) { // only support 1024 page faults
|
if (store_index < 1024u) { // only support 1024 page faults
|
||||||
parsed_buffer[store_index] = uint64_t(page) << caching_pagebits;
|
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user