renderer_vulkan: Commonize buffer binding

This commit is contained in:
IndecisiveTurtle 2024-10-17 23:46:06 +03:00
parent ed84d7460c
commit b1cecf6e87
12 changed files with 249 additions and 270 deletions

View File

@ -1075,7 +1075,16 @@ ScePthread PThreadPool::Create(const char* name) {
} }
} }
#ifdef _WIN64
auto* ret = new PthreadInternal{}; auto* ret = new PthreadInternal{};
#else
// TODO: Linux specific hack
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
auto* ret = reinterpret_cast<PthreadInternal*>(
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
#endif
ret->is_free = false; ret->is_free = false;
ret->is_detached = false; ret->is_detached = false;
ret->is_almost_done = false; ret->is_almost_done = false;

View File

@ -142,6 +142,7 @@ public:
VAddr cpu_addr = 0; VAddr cpu_addr = 0;
bool is_picked{}; bool is_picked{};
bool is_coherent{}; bool is_coherent{};
bool is_deleted{};
int stream_score = 0; int stream_score = 0;
size_t size_bytes = 0; size_t size_bytes = 0;
std::span<u8> mapped_data; std::span<u8> mapped_data;

View File

@ -291,7 +291,7 @@ void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) {
} }
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
bool is_texel_buffer) { bool is_texel_buffer, BufferId buffer_id) {
static constexpr u64 StreamThreshold = CACHING_PAGESIZE; static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size); const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) { if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
@ -301,16 +301,19 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
return {&stream_buffer, offset}; return {&stream_buffer, offset};
} }
const BufferId buffer_id = FindBuffer(device_addr, size); if (!buffer_id || slot_buffers[buffer_id].is_deleted) {
buffer_id = FindBuffer(device_addr, size);
}
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer); SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
if (is_written) { if (is_written) {
memory_tracker.MarkRegionAsGpuModified(device_addr, size); memory_tracker.MarkRegionAsGpuModified(device_addr, size);
gpu_regions.Add(device_addr, size);
} }
return {&buffer, buffer.Offset(device_addr)}; return {&buffer, buffer.Offset(device_addr)};
} }
std::pair<Buffer*, u32> BufferCache::ObtainTempBuffer(VAddr gpu_addr, u32 size) { std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) {
const u64 page = gpu_addr >> CACHING_PAGEBITS; const u64 page = gpu_addr >> CACHING_PAGEBITS;
const BufferId buffer_id = page_table[page]; const BufferId buffer_id = page_table[page];
if (buffer_id) { if (buffer_id) {
@ -539,6 +542,8 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
largest_copy = std::max(largest_copy, range_size); largest_copy = std::max(largest_copy, range_size);
}; };
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
bool has_gpu = false;
gpu_regions.ForEachInRange(device_addr_out, range_size, [&](VAddr, VAddr) { has_gpu = true; });
add_copy(device_addr_out, range_size); add_copy(device_addr_out, range_size);
// Prevent uploading to gpu modified regions. // Prevent uploading to gpu modified regions.
// gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy); // gpu_modified_ranges.ForEachNotInRange(device_addr_out, range_size, add_copy);
@ -656,12 +661,13 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { void BufferCache::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
// Mark the whole buffer as CPU written to stop tracking CPU writes // Mark the whole buffer as CPU written to stop tracking CPU writes
if (!do_not_mark) {
Buffer& buffer = slot_buffers[buffer_id]; Buffer& buffer = slot_buffers[buffer_id];
if (!do_not_mark) {
memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
} }
Unregister(buffer_id); Unregister(buffer_id);
scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); }); scheduler.DeferOperation([this, buffer_id] { slot_buffers.erase(buffer_id); });
buffer.is_deleted = true;
} }
} // namespace VideoCore } // namespace VideoCore

View File

@ -12,6 +12,7 @@
#include "common/types.h" #include "common/types.h"
#include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/memory_tracker_base.h" #include "video_core/buffer_cache/memory_tracker_base.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h" #include "video_core/multi_level_page_table.h"
namespace AmdGpu { namespace AmdGpu {
@ -85,10 +86,10 @@ public:
/// Obtains a buffer for the specified region. /// Obtains a buffer for the specified region.
[[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written,
bool is_texel_buffer = false); bool is_texel_buffer = false, BufferId buffer_id = {});
/// Obtains a temporary buffer for usage in texture cache. /// Attempts to obtain a buffer without modifying the cache contents.
[[nodiscard]] std::pair<Buffer*, u32> ObtainTempBuffer(VAddr gpu_addr, u32 size); [[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size);
/// Return true when a region is registered on the cache /// Return true when a region is registered on the cache
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
@ -99,6 +100,8 @@ public:
/// Return true when a CPU region is modified from the GPU /// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
private: private:
template <typename Func> template <typename Func>
void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) { void ForEachBufferInRange(VAddr device_addr, u64 size, Func&& func) {
@ -119,8 +122,6 @@ private:
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size); void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
[[nodiscard]] BufferId FindBuffer(VAddr device_addr, u32 size);
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
@ -150,6 +151,7 @@ private:
Buffer gds_buffer; Buffer gds_buffer;
std::mutex mutex; std::mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_regions;
vk::BufferView null_buffer_view; vk::BufferView null_buffer_view;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
PageTable page_table; PageTable page_table;

View File

@ -3,7 +3,6 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/alignment.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
@ -113,30 +112,22 @@ ComputePipeline::~ComputePipeline() = default;
bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const { VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures. // Bind resource buffers and textures.
boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers; BufferBarriers buffer_barriers;
Shader::PushData push_data{}; Shader::PushData push_data{};
Shader::Backend::Bindings binding{}; Shader::Backend::Bindings binding{};
image_infos.clear();
info->PushUd(binding, push_data); info->PushUd(binding, push_data);
for (const auto& desc : info->buffers) {
bool is_storage = true;
if (desc.is_gds_buffer) {
auto* vk_buffer = buffer_cache.GetGdsBuffer();
buffer_infos.emplace_back(vk_buffer->Handle(), 0, vk_buffer->SizeBytes());
} else {
const auto vsharp = desc.GetSharp(*info);
is_storage = desc.IsStorage(vsharp);
const VAddr address = vsharp.base_address;
// Most of the time when a metadata is updated with a shader it gets cleared. It means // Most of the time when a metadata is updated with a shader it gets cleared. It means
// we can skip the whole dispatch and update the tracked state instead. Also, it is not // we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways. For cases of metadata read a warning will be // will need its full emulation anyways. For cases of metadata read a warning will be logged.
// logged. for (const auto& desc : info->buffers) {
if (desc.is_gds_buffer) {
continue;
}
const VAddr address = desc.GetSharp(*info).base_address;
if (desc.is_written) { if (desc.is_written) {
if (texture_cache.TouchMeta(address, true)) { if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped"); LOG_TRACE(Render_Vulkan, "Metadata update skipped");
@ -147,106 +138,17 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
} }
} }
const u32 size = vsharp.GetSize();
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, desc.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
} }
const auto null_buffer_view = BindBuffers(buffer_cache, texture_cache, *info, binding, push_data,
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); set_writes, buffer_barriers);
for (const auto& desc : info->texture_buffers) {
const auto vsharp = desc.GetSharp(*info);
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
const u32 size = vsharp.GetSize();
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) {
const VAddr address = vsharp.base_address;
if (desc.is_written) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return false;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eComputeShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(address, size);
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
++binding.buffer;
}
BindTextures(texture_cache, *info, binding, set_writes); BindTextures(texture_cache, *info, binding, set_writes);
for (const auto& sampler : info->samplers) {
const auto ssharp = sampler.GetSharp(*info);
if (ssharp.force_degamma) {
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
}
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
}
if (set_writes.empty()) { if (set_writes.empty()) {
return false; return false;
} }
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
if (!buffer_barriers.empty()) { if (!buffer_barriers.empty()) {
const auto dependencies = vk::DependencyInfo{ const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
@ -256,11 +158,14 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
scheduler.EndRendering(); scheduler.EndRendering();
cmdbuf.pipelineBarrier2(dependencies); cmdbuf.pipelineBarrier2(dependencies);
} }
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
&push_data);
// Bind descriptor set.
if (uses_push_descriptors) { if (uses_push_descriptors) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0,
set_writes); set_writes);
} else { return true;
}
const auto desc_set = desc_heap.Commit(*desc_layout); const auto desc_set = desc_heap.Commit(*desc_layout);
for (auto& set_write : set_writes) { for (auto& set_write : set_writes) {
set_write.dstSet = desc_set; set_write.dstSet = desc_set;
@ -268,10 +173,7 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
instance.GetDevice().updateDescriptorSets(set_writes, {}); instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set, cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeline_layout, 0, desc_set,
{}); {});
}
cmdbuf.pushConstants(*pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0u, sizeof(push_data),
&push_data);
return true; return true;
} }

View File

@ -5,7 +5,7 @@
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include "common/alignment.h" #include "common/scope_exit.h"
#include "common/assert.h" #include "common/assert.h"
#include "video_core/amdgpu/resource.h" #include "video_core/amdgpu/resource.h"
#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/buffer_cache.h"
@ -384,15 +384,11 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
VideoCore::BufferCache& buffer_cache, VideoCore::BufferCache& buffer_cache,
VideoCore::TextureCache& texture_cache) const { VideoCore::TextureCache& texture_cache) const {
// Bind resource buffers and textures. // Bind resource buffers and textures.
boost::container::static_vector<vk::BufferView, 8> buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes; boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
boost::container::small_vector<vk::BufferMemoryBarrier2, 16> buffer_barriers; BufferBarriers buffer_barriers;
Shader::PushData push_data{}; Shader::PushData push_data{};
Shader::Backend::Bindings binding{}; Shader::Backend::Bindings binding{};
image_infos.clear();
for (const auto* stage : stages) { for (const auto* stage : stages) {
if (!stage) { if (!stage) {
continue; continue;
@ -402,112 +398,22 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
push_data.step1 = regs.vgt_instance_step_rate_1; push_data.step1 = regs.vgt_instance_step_rate_1;
} }
stage->PushUd(binding, push_data); stage->PushUd(binding, push_data);
for (const auto& buffer : stage->buffers) {
const auto vsharp = buffer.GetSharp(*stage);
const bool is_storage = buffer.IsStorage(vsharp);
if (vsharp && vsharp.GetSize() > 0) {
const VAddr address = vsharp.base_address;
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (buffer)");
}
const u32 size = vsharp.GetSize();
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, buffer.is_written);
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, size + adjust);
} else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
const auto null_buffer_view = BindBuffers(buffer_cache, texture_cache, *stage, binding, push_data,
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView(); set_writes, buffer_barriers);
for (const auto& desc : stage->texture_buffers) {
const auto vsharp = desc.GetSharp(*stage);
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
const u32 size = vsharp.GetSize();
if (vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid && size != 0) {
const VAddr address = vsharp.base_address;
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(address, size, desc.is_written, true);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
buffer_view = vk_buffer->View(offset_aligned, size + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
const auto dst_access = desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead;
if (auto barrier = vk_buffer->GetBarrier(
dst_access, vk::PipelineStageFlagBits2::eVertexShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(address, size);
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
++binding.buffer;
}
BindTextures(texture_cache, *stage, binding, set_writes); BindTextures(texture_cache, *stage, binding, set_writes);
for (const auto& sampler : stage->samplers) {
auto ssharp = sampler.GetSharp(*stage);
if (ssharp.force_degamma) {
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
}
if (sampler.disable_aniso) {
const auto& tsharp = stage->images[sampler.associated_image].GetSharp(*stage);
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
}
}
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
}
} }
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
SCOPE_EXIT {
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
};
if (set_writes.empty()) {
return;
}
if (!buffer_barriers.empty()) { if (!buffer_barriers.empty()) {
const auto dependencies = vk::DependencyInfo{ const auto dependencies = vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
@ -517,12 +423,12 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
scheduler.EndRendering(); scheduler.EndRendering();
cmdbuf.pipelineBarrier2(dependencies); cmdbuf.pipelineBarrier2(dependencies);
} }
// Bind descriptor set.
if (!set_writes.empty()) {
if (uses_push_descriptors) { if (uses_push_descriptors) {
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
set_writes); set_writes);
} else { return;
}
const auto desc_set = desc_heap.Commit(*desc_layout); const auto desc_set = desc_heap.Commit(*desc_layout);
for (auto& set_write : set_writes) { for (auto& set_write : set_writes) {
set_write.dstSet = desc_set; set_write.dstSet = desc_set;
@ -530,10 +436,6 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
instance.GetDevice().updateDescriptorSets(set_writes, {}); instance.GetDevice().updateDescriptorSets(set_writes, {});
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0, cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *pipeline_layout, 0,
desc_set, {}); desc_set, {});
}
}
cmdbuf.pushConstants(*pipeline_layout, gp_stage_flags, 0U, sizeof(push_data), &push_data);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, Handle());
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -8,10 +8,13 @@
#include "video_core/renderer_vulkan/vk_pipeline_common.h" #include "video_core/renderer_vulkan/vk_pipeline_common.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache.h"
#include "video_core/buffer_cache/buffer_cache.h"
namespace Vulkan { namespace Vulkan {
boost::container::static_vector<vk::DescriptorImageInfo, 32> Pipeline::image_infos; boost::container::static_vector<vk::DescriptorImageInfo, 32> Pipeline::image_infos;
boost::container::static_vector<vk::BufferView, 8> Pipeline::buffer_views;
boost::container::static_vector<vk::DescriptorBufferInfo, 32> Pipeline::buffer_infos;
Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_, Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
vk::PipelineCache pipeline_cache) vk::PipelineCache pipeline_cache)
@ -19,12 +22,136 @@ Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorH
Pipeline::~Pipeline() = default; Pipeline::~Pipeline() = default;
void Pipeline::BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache,
const Shader::Info& stage,
Shader::Backend::Bindings& binding, Shader::PushData& push_data,
DescriptorWrites& set_writes, BufferBarriers& buffer_barriers) const {
using BufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
static boost::container::static_vector<BufferBindingInfo, 32> buffer_bindings;
buffer_bindings.clear();
buffer_infos.clear();
for (const auto& desc : stage.buffers) {
const auto vsharp = desc.GetSharp(stage);
if (!desc.is_gds_buffer && vsharp.base_address != 0 && vsharp.GetSize() > 0) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
buffer_bindings.emplace_back(buffer_id, vsharp);
} else {
buffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
}
}
using TexBufferBindingInfo = std::pair<VideoCore::BufferId, AmdGpu::Buffer>;
static boost::container::static_vector<TexBufferBindingInfo, 32> texbuffer_bindings;
texbuffer_bindings.clear();
buffer_views.clear();
for (const auto& desc : stage.texture_buffers) {
const auto vsharp = desc.GetSharp(stage);
if (vsharp.base_address != 0 && vsharp.GetSize() > 0 &&
vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid) {
const auto buffer_id = buffer_cache.FindBuffer(vsharp.base_address, vsharp.GetSize());
texbuffer_bindings.emplace_back(buffer_id, vsharp);
} else {
texbuffer_bindings.emplace_back(VideoCore::BufferId{}, vsharp);
}
}
// Second pass to re-bind buffers that were updated after binding
for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i];
const bool is_storage = desc.IsStorage(vsharp);
if (!buffer_id) {
if (desc.is_gds_buffer) {
const auto* gds_buf = buffer_cache.GetGdsBuffer();
buffer_infos.emplace_back(gds_buf->Handle(), 0, gds_buf->SizeBytes());
} else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
}
} else {
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written,
false, buffer_id);
const u32 alignment =
is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment();
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % 4 == 0);
push_data.AddOffset(binding.buffer, adjust);
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned, vsharp.GetSize() + adjust);
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
++binding.buffer;
}
const auto null_buffer_view =
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
const auto& desc = stage.texture_buffers[i];
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view);
if (buffer_id) {
const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] =
buffer_cache.ObtainBuffer(vsharp.base_address, vsharp.GetSize(), desc.is_written, true,
buffer_id);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
ASSERT_MSG(fmt_stride == vsharp.GetStride(),
"Texel buffer stride must match format stride");
const u32 offset_aligned = Common::AlignDown(offset, alignment);
const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0);
push_data.AddOffset(binding.buffer, adjust / fmt_stride);
buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written,
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead,
vk::PipelineStageFlagBits2::eComputeShader)) {
buffer_barriers.emplace_back(*barrier);
}
if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view,
});
++binding.buffer;
}
}
void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
Shader::Backend::Bindings& binding, Shader::Backend::Bindings& binding,
DescriptorWrites& set_writes) const { DescriptorWrites& set_writes) const {
using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>; using ImageBindingInfo = std::tuple<VideoCore::ImageId, AmdGpu::Image, Shader::ImageResource>;
boost::container::static_vector<ImageBindingInfo, 32> image_bindings; static boost::container::static_vector<ImageBindingInfo, 32> image_bindings;
image_bindings.clear();
image_infos.clear();
for (const auto& image_desc : stage.images) { for (const auto& image_desc : stage.images) {
const auto tsharp = image_desc.GetSharp(stage); const auto tsharp = image_desc.GetSharp(stage);
@ -76,6 +203,26 @@ void Pipeline::BindTextures(VideoCore::TextureCache& texture_cache, const Shader
.pImageInfo = &image_infos.back(), .pImageInfo = &image_infos.back(),
}); });
} }
for (const auto& sampler : stage.samplers) {
auto ssharp = sampler.GetSharp(stage);
if (sampler.disable_aniso) {
const auto& tsharp = stage.images[sampler.associated_image].GetSharp(stage);
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
ssharp.max_aniso.Assign(AmdGpu::AnisoRatio::One);
}
}
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
}
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -33,6 +33,13 @@ public:
} }
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>; using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
using BufferBarriers = boost::container::small_vector<vk::BufferMemoryBarrier2, 16>;
void BindBuffers(VideoCore::BufferCache& buffer_cache, VideoCore::TextureCache& texture_cache,
const Shader::Info& stage, Shader::Backend::Bindings& binding,
Shader::PushData& push_data, DescriptorWrites& set_writes,
BufferBarriers& buffer_barriers) const;
void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage, void BindTextures(VideoCore::TextureCache& texture_cache, const Shader::Info& stage,
Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const; Shader::Backend::Bindings& binding, DescriptorWrites& set_writes) const;
@ -44,6 +51,8 @@ protected:
vk::UniquePipelineLayout pipeline_layout; vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout; vk::UniqueDescriptorSetLayout desc_layout;
static boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos; static boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
static boost::container::static_vector<vk::BufferView, 8> buffer_views;
static boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@ -1,6 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#pragma clang optimize off
// Include the vulkan platform specific header // Include the vulkan platform specific header
#if defined(ANDROID) #if defined(ANDROID)
#define VK_USE_PLATFORM_ANDROID_KHR #define VK_USE_PLATFORM_ANDROID_KHR

View File

@ -98,10 +98,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex); const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
buffer_cache.BindVertexBuffers(vs_info); buffer_cache.BindVertexBuffers(vs_info);
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, 0); buffer_cache.BindIndexBuffer(is_indexed, 0);
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
const auto total_offset = base + offset;
BeginRendering(*pipeline); BeginRendering(*pipeline);
UpdateDynamicState(*pipeline); UpdateDynamicState(*pipeline);
@ -110,9 +109,9 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr address, u32 offset, u32 si
// instance offsets will be automatically applied by Vulkan from indirect args buffer. // instance offsets will be automatically applied by Vulkan from indirect args buffer.
if (is_indexed) { if (is_indexed) {
cmdbuf.drawIndexedIndirect(buffer->Handle(), total_offset, 1, 0); cmdbuf.drawIndexedIndirect(buffer->Handle(), base, 1, 0);
} else { } else {
cmdbuf.drawIndirect(buffer->Handle(), total_offset, 1, 0); cmdbuf.drawIndirect(buffer->Handle(), base, 1, 0);
} }
} }
@ -161,9 +160,8 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
scheduler.EndRendering(); scheduler.EndRendering();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true); const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false);
const auto total_offset = base + offset; cmdbuf.dispatchIndirect(buffer->Handle(), base);
cmdbuf.dispatchIndirect(buffer->Handle(), total_offset);
} }
u64 Rasterizer::Flush() { u64 Rasterizer::Flush() {

View File

@ -8,6 +8,9 @@
namespace VideoCore { namespace VideoCore {
Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler) { Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sampler) {
if (sampler.force_degamma) {
LOG_WARNING(Render_Vulkan, "Texture requires gamma correction");
}
using namespace Vulkan; using namespace Vulkan;
const vk::SamplerCreateInfo sampler_ci = { const vk::SamplerCreateInfo sampler_ci = {
.magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter), .magFilter = LiverpoolToVK::Filter(sampler.xy_mag_filter),

View File

@ -417,7 +417,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
const VAddr image_addr = image.info.guest_address; const VAddr image_addr = image.info.guest_address;
const size_t image_size = image.info.guest_size_bytes; const size_t image_size = image.info.guest_size_bytes;
const auto [vk_buffer, buf_offset] = buffer_cache.ObtainTempBuffer(image_addr, image_size); const auto [vk_buffer, buf_offset] = buffer_cache.ObtainViewBuffer(image_addr, image_size);
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW // The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
// hazard // hazard
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead, if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,