Merge branch 'shadps4-emu:main' into main

This commit is contained in:
Daniel Nylander 2025-01-07 08:49:06 +01:00 committed by GitHub
commit a4f9c72901
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
32 changed files with 9385 additions and 18251 deletions

View File

@ -14,14 +14,14 @@ env:
jobs: jobs:
reuse: reuse:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: fsfe/reuse-action@v5 - uses: fsfe/reuse-action@v5
clang-format: clang-format:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -39,7 +39,7 @@ jobs:
run: ./.ci/clang-format.sh run: ./.ci/clang-format.sh
get-info: get-info:
runs-on: ubuntu-latest runs-on: ubuntu-24.04
outputs: outputs:
date: ${{ steps.vars.outputs.date }} date: ${{ steps.vars.outputs.date }}
shorthash: ${{ steps.vars.outputs.shorthash }} shorthash: ${{ steps.vars.outputs.shorthash }}
@ -57,7 +57,7 @@ jobs:
echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT echo "fullhash=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
windows-sdl: windows-sdl:
runs-on: windows-latest runs-on: windows-2025
needs: get-info needs: get-info
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -101,7 +101,7 @@ jobs:
path: ${{github.workspace}}/build/shadPS4.exe path: ${{github.workspace}}/build/shadPS4.exe
windows-qt: windows-qt:
runs-on: windows-latest runs-on: windows-2025
needs: get-info needs: get-info
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -119,19 +119,23 @@ public:
return buffer; return buffer;
} }
std::optional<vk::BufferMemoryBarrier2> GetBarrier(vk::AccessFlagBits2 dst_acess_mask, std::optional<vk::BufferMemoryBarrier2> GetBarrier(
vk::PipelineStageFlagBits2 dst_stage) { vk::Flags<vk::AccessFlagBits2> dst_acess_mask, vk::PipelineStageFlagBits2 dst_stage,
u32 offset = 0) {
if (dst_acess_mask == access_mask && stage == dst_stage) { if (dst_acess_mask == access_mask && stage == dst_stage) {
return {}; return {};
} }
DEBUG_ASSERT(offset < size_bytes);
auto barrier = vk::BufferMemoryBarrier2{ auto barrier = vk::BufferMemoryBarrier2{
.srcStageMask = stage, .srcStageMask = stage,
.srcAccessMask = access_mask, .srcAccessMask = access_mask,
.dstStageMask = dst_stage, .dstStageMask = dst_stage,
.dstAccessMask = dst_acess_mask, .dstAccessMask = dst_acess_mask,
.buffer = buffer.buffer, .buffer = buffer.buffer,
.size = size_bytes, .offset = offset,
.size = size_bytes - offset,
}; };
access_mask = dst_acess_mask; access_mask = dst_acess_mask;
stage = dst_stage; stage = dst_stage;
@ -150,8 +154,10 @@ public:
Vulkan::Scheduler* scheduler; Vulkan::Scheduler* scheduler;
MemoryUsage usage; MemoryUsage usage;
UniqueBuffer buffer; UniqueBuffer buffer;
vk::AccessFlagBits2 access_mask{vk::AccessFlagBits2::eNone}; vk::Flags<vk::AccessFlagBits2> access_mask{
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eNone}; vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite};
vk::PipelineStageFlagBits2 stage{vk::PipelineStageFlagBits2::eAllCommands};
}; };
class StreamBuffer : public Buffer { class StreamBuffer : public Buffer {

View File

@ -34,21 +34,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
// Ensure the first slot is used for the null buffer // Ensure the first slot is used for the null buffer
const auto null_id = const auto null_id =
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 1); slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, 0, ReadFlags, 16);
ASSERT(null_id.index == 0); ASSERT(null_id.index == 0);
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
const vk::BufferViewCreateInfo null_view_ci = {
.buffer = null_buffer,
.format = vk::Format::eR8Unorm,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
const auto [null_view_result, null_view] = instance.GetDevice().createBufferView(null_view_ci);
ASSERT_MSG(null_view_result == vk::Result::eSuccess, "Failed to create null buffer view.");
null_buffer_view = null_view;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer_view, "Null Buffer View");
} }
BufferCache::~BufferCache() = default; BufferCache::~BufferCache() = default;
@ -479,43 +468,36 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
}; };
scheduler.EndRendering(); scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
const std::array pre_barriers = {
vk::BufferMemoryBarrier2{ boost::container::static_vector<vk::BufferMemoryBarrier2, 2> pre_barriers{};
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, if (auto src_barrier = overlap.GetBarrier(vk::AccessFlagBits2::eTransferRead,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, vk::PipelineStageFlagBits2::eTransfer)) {
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, pre_barriers.push_back(*src_barrier);
.dstAccessMask = vk::AccessFlagBits2::eTransferRead, }
.buffer = overlap.Handle(), if (auto dst_barrier =
.offset = 0, new_buffer.GetBarrier(vk::AccessFlagBits2::eTransferWrite,
.size = overlap.SizeBytes(), vk::PipelineStageFlagBits2::eTransfer, dst_base_offset)) {
}, pre_barriers.push_back(*dst_barrier);
}; }
const std::array post_barriers = {
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.buffer = overlap.Handle(),
.offset = 0,
.size = overlap.SizeBytes(),
},
vk::BufferMemoryBarrier2{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = new_buffer.Handle(),
.offset = dst_base_offset,
.size = overlap.SizeBytes(),
},
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1, .bufferMemoryBarrierCount = static_cast<u32>(pre_barriers.size()),
.pBufferMemoryBarriers = pre_barriers.data(), .pBufferMemoryBarriers = pre_barriers.data(),
}); });
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy); cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
boost::container::static_vector<vk::BufferMemoryBarrier2, 2> post_barriers{};
if (auto src_barrier =
overlap.GetBarrier(vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
vk::PipelineStageFlagBits2::eAllCommands)) {
post_barriers.push_back(*src_barrier);
}
if (auto dst_barrier = new_buffer.GetBarrier(
vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
vk::PipelineStageFlagBits2::eAllCommands, dst_base_offset)) {
post_barriers.push_back(*dst_barrier);
}
cmdbuf.pipelineBarrier2(vk::DependencyInfo{ cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion, .dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()), .bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
@ -626,7 +608,8 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
const auto cmdbuf = scheduler.CommandBuffer(); const auto cmdbuf = scheduler.CommandBuffer();
const vk::BufferMemoryBarrier2 pre_barrier = { const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead, .srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite |
vk::AccessFlagBits2::eTransferRead | vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer, .dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite, .dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = buffer.Handle(), .buffer = buffer.Handle(),

View File

@ -71,10 +71,6 @@ public:
return slot_buffers[id]; return slot_buffers[id];
} }
[[nodiscard]] vk::BufferView& NullBufferView() {
return null_buffer_view;
}
/// Invalidates any buffer in the logical page range. /// Invalidates any buffer in the logical page range.
void InvalidateMemory(VAddr device_addr, u64 size); void InvalidateMemory(VAddr device_addr, u64 size);
@ -160,7 +156,6 @@ private:
std::shared_mutex mutex; std::shared_mutex mutex;
Common::SlotVector<Buffer> slot_buffers; Common::SlotVector<Buffer> slot_buffers;
RangeSet gpu_modified_ranges; RangeSet gpu_modified_ranges;
vk::BufferView null_buffer_view;
MemoryTracker memory_tracker; MemoryTracker memory_tracker;
PageTable page_table; PageTable page_table;
}; };

View File

@ -537,6 +537,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
} }
// Second pass to re-bind buffers that were updated after binding // Second pass to re-bind buffers that were updated after binding
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
for (u32 i = 0; i < buffer_bindings.size(); i++) { for (u32 i = 0; i < buffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = buffer_bindings[i]; const auto& [buffer_id, vsharp] = buffer_bindings[i];
const auto& desc = stage.buffers[i]; const auto& desc = stage.buffers[i];
@ -548,7 +549,6 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
} else if (instance.IsNullDescriptorSupported()) { } else if (instance.IsNullDescriptorSupported()) {
buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(VK_NULL_HANDLE, 0, VK_WHOLE_SIZE);
} else { } else {
auto& null_buffer = buffer_cache.GetBuffer(VideoCore::NULL_BUFFER_ID);
buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE);
} }
} else { } else {
@ -582,17 +582,19 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
++binding.buffer; ++binding.buffer;
} }
const auto null_buffer_view =
instance.IsNullDescriptorSupported() ? VK_NULL_HANDLE : buffer_cache.NullBufferView();
for (u32 i = 0; i < texbuffer_bindings.size(); i++) { for (u32 i = 0; i < texbuffer_bindings.size(); i++) {
const auto& [buffer_id, vsharp] = texbuffer_bindings[i]; const auto& [buffer_id, vsharp] = texbuffer_bindings[i];
const auto& desc = stage.texture_buffers[i]; const auto& desc = stage.texture_buffers[i];
vk::BufferView& buffer_view = buffer_views.emplace_back(null_buffer_view); // Fallback format for null buffer view; never used in valid buffer case.
const auto data_fmt = vsharp.GetDataFmt() != AmdGpu::DataFormat::FormatInvalid
? vsharp.GetDataFmt()
: AmdGpu::DataFormat::Format8;
const u32 fmt_stride = AmdGpu::NumBits(data_fmt) >> 3;
vk::BufferView buffer_view;
if (buffer_id) { if (buffer_id) {
const u32 alignment = instance.TexelBufferMinAlignment(); const u32 alignment = instance.TexelBufferMinAlignment();
const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer(
vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id);
const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3;
const u32 buf_stride = vsharp.GetStride(); const u32 buf_stride = vsharp.GetStride();
ASSERT_MSG(buf_stride % fmt_stride == 0, ASSERT_MSG(buf_stride % fmt_stride == 0,
"Texel buffer stride must match format stride"); "Texel buffer stride must match format stride");
@ -600,9 +602,8 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
const u32 adjust = offset - offset_aligned; const u32 adjust = offset - offset_aligned;
ASSERT(adjust % fmt_stride == 0); ASSERT(adjust % fmt_stride == 0);
push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride); push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride);
buffer_view = buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust,
vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, desc.is_written, data_fmt, vsharp.GetNumberFmt());
vsharp.GetDataFmt(), vsharp.GetNumberFmt());
if (auto barrier = if (auto barrier =
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
: vk::AccessFlagBits2::eShaderRead, : vk::AccessFlagBits2::eShaderRead,
@ -612,6 +613,11 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
if (desc.is_written) { if (desc.is_written) {
texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize()); texture_cache.InvalidateMemoryFromGPU(vsharp.base_address, vsharp.GetSize());
} }
} else if (instance.IsNullDescriptorSupported()) {
buffer_view = VK_NULL_HANDLE;
} else {
buffer_view =
null_buffer.View(0, fmt_stride, desc.is_written, data_fmt, vsharp.GetNumberFmt());
} }
set_writes.push_back({ set_writes.push_back({
@ -621,7 +627,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
.descriptorCount = 1, .descriptorCount = 1,
.descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer .descriptorType = desc.is_written ? vk::DescriptorType::eStorageTexelBuffer
: vk::DescriptorType::eUniformTexelBuffer, : vk::DescriptorType::eUniformTexelBuffer,
.pTexelBufferView = &buffer_view, .pTexelBufferView = &buffer_views.emplace_back(buffer_view),
}); });
++binding.buffer; ++binding.buffer;
} }