diff --git a/src/shader_recompiler/frontend/translate/data_share.cpp b/src/shader_recompiler/frontend/translate/data_share.cpp index a20f5c89a..d01c1977a 100644 --- a/src/shader_recompiler/frontend/translate/data_share.cpp +++ b/src/shader_recompiler/frontend/translate/data_share.cpp @@ -198,14 +198,14 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) { void Translator::DS_APPEND(const GcnInst& inst) { const u32 inst_offset = inst.control.ds.offset0; - const IR::U32 gds_offset = ir.IAdd(m0_value, ir.Imm32(inst_offset)); + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); const IR::U32 prev = ir.DataAppend(gds_offset); SetDst(inst.dst[0], prev); } void Translator::DS_CONSUME(const GcnInst& inst) { const u32 inst_offset = inst.control.ds.offset0; - const IR::U32 gds_offset = ir.IAdd(m0_value, ir.Imm32(inst_offset)); + const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset)); const IR::U32 prev = ir.DataConsume(gds_offset); SetDst(inst.dst[0], prev); } diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index e71d965c6..b4470ee39 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -975,8 +975,9 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) { inst.src[1].field == OperandField::ConstZero) { return; } - // v_mbcnt_hi_u32_b32 v20, exec_hi, 0 - if (inst.src[0].field == OperandField::ExecHi) { + // v_mbcnt_hi_u32_b32 vX, exec_hi, 0 + if (inst.src[0].field == OperandField::ExecHi && + inst.src[1].field == OperandField::ConstZero) { return; } } else { diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 719e89104..aa5d39ae8 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -641,7 +641,7 @@ void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info, // M0 must be set by some user data register. const IR::Inst* prod = gds_offset.InstRecursive(); const u32 ud_reg = u32(result.value()->Arg(0).ScalarReg()); - u32 m0_val = info.user_data[ud_reg]; + u32 m0_val = info.user_data[ud_reg] >> 16; if (prod->GetOpcode() == IR::Opcode::IAdd32) { m0_val += prod->Arg(1).U32(); } diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 56c325195..cbc18aa43 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -465,6 +465,14 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); event_eos->SignalFence(); + if (event_eos->command == PM4CmdEventWriteEos::Command::GdsStore) { + ASSERT(event_eos->size == 1); + if (rasterizer) { + rasterizer->Finish(); + const u32 value = rasterizer->ReadDataFromGds(event_eos->gds_index); + *event_eos->Address() = value; + } + } break; } case PM4ItOpcode::EventWriteEop: { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index 6973efbce..fd7980c17 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -513,13 +513,17 @@ struct PM4CmdEventWriteEos { } void SignalFence() const { - switch (command.Value()) { + const auto cmd = command.Value(); + switch (cmd) { case Command::SingalFence: { *Address() = DataDWord(); break; } + case Command::GdsStore: { + break; + } default: { - UNREACHABLE(); + UNREACHABLE_MSG("Unknown command {}", u32(cmd)); } } } diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 1d842bcf7..86af05bf1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -16,8 +16,8 @@ namespace VideoCore { static constexpr size_t NumVertexBuffers = 32; static constexpr size_t GdsBufferSize = 64_KB; -static constexpr size_t StagingBufferSize = 512_MB; -static constexpr size_t UboStreamBufferSize = 64_MB; +static constexpr size_t StagingBufferSize = 1_GB; +static constexpr size_t UboStreamBufferSize = 128_MB; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, const AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, @@ -26,7 +26,7 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s texture_cache{texture_cache_}, tracker{tracker_}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, - gds_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, GdsBufferSize}, + gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, GdsBufferSize}, memory_tracker{&tracker} { Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); @@ -240,6 +240,20 @@ void BufferCache::InlineDataToGds(u32 gds_offset, u32 value) { ASSERT_MSG(gds_offset % 4 == 0, "GDS offset must be dword aligned"); scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier2 buf_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .buffer = gds_buffer.Handle(), + .offset = gds_offset, + .size = sizeof(u32), + }; + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &buf_barrier, + }); cmdbuf.updateBuffer(gds_buffer.Handle(), gds_offset, sizeof(u32), &value); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 18f262bc0..a548b70a4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -431,6 +431,9 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs, dst_access, vk::PipelineStageFlagBits2::eVertexShader)) { buffer_barriers.emplace_back(*barrier); } + if (desc.is_written) { + texture_cache.MarkWritten(address, size); + } } set_writes.push_back({ .dstSet = VK_NULL_HANDLE, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d65635e87..b4b256bb0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -297,8 +297,14 @@ bool PipelineCache::RefreshGraphicsKey() { if (stage != Shader::Stage::Vertex && stage != Shader::Stage::Fragment) { return false; } + + static bool TessMissingLogged = false; if (auto* pgm = regs.ProgramForStage(3); regs.stage_enable.IsStageEnabled(3) && pgm->Address() != 0) { + if (!TessMissingLogged) { + LOG_WARNING(Render_Vulkan, "Tess pipeline compilation skipped"); + TessMissingLogged = true; + } return false; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fbe1eb548..6344315a5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -175,6 +175,10 @@ u64 Rasterizer::Flush() { return current_tick; } +void Rasterizer::Finish() { + scheduler.Finish(); +} + void Rasterizer::BeginRendering() { const auto& regs = liverpool->regs; RenderState state; @@ -255,6 +259,13 @@ void Rasterizer::InlineDataToGds(u32 gds_offset, u32 value) { buffer_cache.InlineDataToGds(gds_offset, value); } +u32 Rasterizer::ReadDataFromGds(u32 gds_offset) { + auto* gds_buf = buffer_cache.GetGdsBuffer(); + u32 value; + std::memcpy(&value, gds_buf->mapped_data.data() + gds_offset, sizeof(u32)); + return value; +} + void Rasterizer::InvalidateMemory(VAddr addr, u64 size) { buffer_cache.InvalidateMemory(addr, size); texture_cache.InvalidateMemory(addr, size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 6a20b5e86..5aa90c5cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -42,12 +42,14 @@ public: void ScopedMarkerInsert(const std::string_view& str); void InlineDataToGds(u32 gds_offset, u32 value); + u32 ReadDataFromGds(u32 gsd_offset); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); void UnmapMemory(VAddr addr, u64 size); void CpSync(); u64 Flush(); + void Finish(); private: void BeginRendering(); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index f932b25a0..1bbb975ba 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -32,7 +32,6 @@ enum ImageFlagBits : u32 { Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked MetaRegistered = 1 << 8, ///< True when metadata for this surface is known and registered - Deleted = 1 << 9, ///< Indicates that images was marked for deletion once frame is done }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 9355308f9..37bb5da14 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -491,8 +491,6 @@ void TextureCache::DeleteImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - image.flags |= ImageFlagBits::Deleted; - // Remove any registered meta areas. const auto& meta_info = image.info.meta_info; if (meta_info.cmask_addr) {