diff --git a/src/common/config.cpp b/src/common/config.cpp index ebdd9c320..3cf9af150 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -25,7 +25,9 @@ static bool shouldDumpPM4 = false; static u32 vblankDivider = 1; static bool vkValidation = false; static bool vkValidationSync = false; +static bool vkValidationGpu = false; static bool rdocEnable = false; +static bool rdocMarkersEnable = false; // Gui std::string settings_install_dir = ""; u32 main_window_geometry_x = 400; @@ -102,6 +104,10 @@ bool isRdocEnabled() { return rdocEnable; } +bool isMarkersEnabled() { + return rdocMarkersEnable; +} + u32 vblankDiv() { return vblankDivider; } @@ -114,6 +120,10 @@ bool vkValidationSyncEnabled() { return vkValidationSync; } +bool vkValidationGpuEnabled() { + return vkValidationGpu; +} + void setScreenWidth(u32 width) { screenWidth = width; } @@ -319,7 +329,9 @@ void load(const std::filesystem::path& path) { gpuId = toml::find_or(vk, "gpuId", -1); vkValidation = toml::find_or(vk, "validation", false); vkValidationSync = toml::find_or(vk, "validation_sync", false); + vkValidationGpu = toml::find_or(vk, "validation_gpu", true); rdocEnable = toml::find_or(vk, "rdocEnable", false); + rdocMarkersEnable = toml::find_or(vk, "rdocMarkersEnable", false); } if (data.contains("Debug")) { @@ -394,7 +406,9 @@ void save(const std::filesystem::path& path) { data["Vulkan"]["gpuId"] = gpuId; data["Vulkan"]["validation"] = vkValidation; data["Vulkan"]["validation_sync"] = vkValidationSync; + data["Vulkan"]["validation_gpu"] = vkValidationGpu; data["Vulkan"]["rdocEnable"] = rdocEnable; + data["Vulkan"]["rdocMarkersEnable"] = rdocMarkersEnable; data["Debug"]["DebugDump"] = isDebugDump; data["LLE"]["libc"] = isLibc; data["GUI"]["theme"] = mw_themes; diff --git a/src/common/config.h b/src/common/config.h index ad0aad221..37ace79c3 100644 --- a/src/common/config.h +++ b/src/common/config.h @@ -27,6 +27,7 @@ bool nullGpu(); bool dumpShaders(); bool dumpPM4(); bool isRdocEnabled(); +bool isMarkersEnabled(); u32 vblankDiv(); void setDebugDump(bool enable); @@ -50,6 +51,7 @@ void setRdocEnabled(bool enable); bool vkValidationEnabled(); bool vkValidationSyncEnabled(); +bool vkValidationGpuEnabled(); // Gui void setMainWindowGeometry(u32 x, u32 y, u32 w, u32 h); diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index 650252f95..c2ee6d592 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -956,9 +956,9 @@ int PS4_SYSV_ABI sceGnmGetGpuBlockStatus() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { - LOG_DEBUG(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency() { + LOG_TRACE(Lib_GnmDriver, "called"); + return Config::isNeoMode() ? 911'000'000 : 800'000'000; } int PS4_SYSV_ABI sceGnmGetGpuInfoStatus() { @@ -1706,8 +1706,18 @@ int PS4_SYSV_ABI sceGnmSetupMipStatsReport() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmSetVgtControl() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode) { + LOG_TRACE(Lib_GnmDriver, "called"); + + if (!cmdbuf || size != 3 || (prim_group_sz_minus_one >= 0x100) || + ((wd_switch_only_on_eop_mode | partial_vs_wave_mode) >= 2)) { + return -1; + } + + const u32 reg_value = + ((partial_vs_wave_mode & 1) << 0x10) | (prim_group_sz_minus_one & 0xffffu); + PM4CmdSetData::SetContextReg(cmdbuf, 0x2aau, reg_value); // IA_MULTI_VGT_PARAM return ORBIS_OK; } diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 8100b1164..84872297e 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -85,7 +85,7 @@ int PS4_SYSV_ABI sceGnmGetDebugTimestamp(); int PS4_SYSV_ABI sceGnmGetEqEventType(); int PS4_SYSV_ABI sceGnmGetEqTimeStamp(); int PS4_SYSV_ABI sceGnmGetGpuBlockStatus(); -int PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); +u32 PS4_SYSV_ABI sceGnmGetGpuCoreClockFrequency(); int PS4_SYSV_ABI sceGnmGetGpuInfoStatus(); int PS4_SYSV_ABI sceGnmGetLastWaitedAddress(); int PS4_SYSV_ABI sceGnmGetNumTcaUnits(); @@ -161,7 +161,8 @@ int PS4_SYSV_ABI sceGnmSetResourceUserData(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCounters(); int PS4_SYSV_ABI sceGnmSetSpiEnableSqCountersForUnitInstance(); int PS4_SYSV_ABI sceGnmSetupMipStatsReport(); -int PS4_SYSV_ABI sceGnmSetVgtControl(); +s32 PS4_SYSV_ABI sceGnmSetVgtControl(u32* cmdbuf, u32 size, u32 prim_group_sz_minus_one, + u32 partial_vs_wave_mode, u32 wd_switch_only_on_eop_mode); s32 PS4_SYSV_ABI sceGnmSetVsShader(u32* cmdbuf, u32 size, const u32* vs_regs, u32 shader_modifier); int PS4_SYSV_ABI sceGnmSetWaveLimitMultiplier(); int PS4_SYSV_ABI sceGnmSetWaveLimitMultipliers(); diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index 370dba445..5441c641a 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -174,10 +174,16 @@ s32 PS4_SYSV_ABI sceKernelCreateSema(OrbisKernelSema* sem, const char* pName, u3 } s32 PS4_SYSV_ABI sceKernelWaitSema(OrbisKernelSema sem, s32 needCount, u32* pTimeout) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(true, needCount, pTimeout); } s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } if (!sem->Signal(signalCount)) { return ORBIS_KERNEL_ERROR_EINVAL; } @@ -185,10 +191,16 @@ s32 PS4_SYSV_ABI sceKernelSignalSema(OrbisKernelSema sem, s32 signalCount) { } s32 PS4_SYSV_ABI sceKernelPollSema(OrbisKernelSema sem, s32 needCount) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Wait(false, needCount, nullptr); } int PS4_SYSV_ABI sceKernelCancelSema(OrbisKernelSema sem, s32 setCount, s32* pNumWaitThreads) { + if (!sem) { + return ORBIS_KERNEL_ERROR_ESRCH; + } return sem->Cancel(setCount, pNumWaitThreads); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index e85272e96..bd34ed3db 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -386,19 +386,12 @@ static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 com if (is_signed) { value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertSToF(ctx.F32[1], value); } else { value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset, ctx.ConstU32(bit_width)); - value = ctx.OpConvertUToF(ctx.F32[1], value); - } - } else { - if (is_signed) { - value = ctx.OpConvertSToF(ctx.F32[1], value); - } else { - value = ctx.OpConvertUToF(ctx.F32[1], value); } } + value = ctx.OpBitcast(ctx.F32[1], value); return ConvertValue(ctx, value, num_format, bit_width); } break; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 030d39485..e2d2c1ae0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -33,14 +33,14 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id c operands.operands); } -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id offset) { const auto& texture = ctx.images[handle & 0xFFFF]; const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]); const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler); return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, - spv::ImageOperandsMask::Lod, ctx.ConstF32(0.f)); + spv::ImageOperandsMask::Lod, lod); } Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 51899eb4d..0b2020f18 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -359,7 +359,7 @@ Id EmitConvertU32U16(EmitContext& ctx, Id value); Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, Id offset); -Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias_lc, +Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id offset); Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref, Id bias_lc, const IR::Value& offset); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 97438f80a..6c96faa3d 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -376,9 +376,11 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors, return -1; } // We have found this pattern. Build the sharp. - std::array buffer; + std::array buffer; buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32(); - buffer[1] = handle->Arg(2).U32() | handle->Arg(3).U64() << 32; + buffer[1] = 0; + buffer[2] = handle->Arg(2).U32(); + buffer[3] = handle->Arg(3).U32(); cbuf = std::bit_cast(buffer); // Assign a binding to this sharp. return descriptors.Add(BufferResource{ diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 4ab71c3b7..b936e06aa 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -116,7 +116,7 @@ struct PushData { std::array buf_offsets; void AddOffset(u32 binding, u32 offset) { - ASSERT(offset < 64 && binding < 32); + ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } }; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index af1963eec..517f9d53a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -180,6 +180,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanSignal(Platform::InterruptId::GfxFlip); break; } + case PM4CmdNop::PayloadType::DebugMarkerPush: { + const auto marker_sz = nop->header.count.Value() * 2; + const std::string_view label{reinterpret_cast(&nop->data_block[1]), + marker_sz}; + rasterizer->ScopeMarkerBegin(label); + break; + } + case PM4CmdNop::PayloadType::DebugMarkerPop: { + rasterizer->ScopeMarkerEnd(); + break; + } default: break; } @@ -226,7 +237,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spantype3.count; - if (nop_offset == 0x0e || nop_offset == 0x0d) { + if (nop_offset == 0x0e || nop_offset == 0x0d || nop_offset == 0x0b) { ASSERT_MSG(payload[nop_offset] == 0xc0001000, "NOP hint is missing in CB setup sequence"); last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; @@ -295,8 +306,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndex2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndex2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true); rasterizer->ScopeMarkerEnd(); } @@ -308,8 +320,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index_off->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin(fmt::format( - "dcb:{}:DrawIndexOffset2", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexOffset2", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(true, draw_index_off->index_offset); rasterizer->ScopeMarkerEnd(); } @@ -320,8 +333,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanindex_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:DrawIndexAuto", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:DrawIndexAuto", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->Draw(false); rasterizer->ScopeMarkerEnd(); } @@ -334,8 +348,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin( - fmt::format("dcb:{}:Dispatch", reinterpret_cast(dcb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("dcb:{}:Dispatch", cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } @@ -486,8 +501,9 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { regs.cs_program.dim_z = dispatch_direct->dim_z; regs.cs_program.dispatch_initiator = dispatch_direct->dispatch_initiator; if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) { - rasterizer->ScopeMarkerBegin(fmt::format( - "acb[{}]:{}:Dispatch", vqid, reinterpret_cast(acb.data()))); + const auto cmd_address = reinterpret_cast(header); + rasterizer->ScopeMarkerBegin(fmt::format("acb[{}]:{}:Dispatch", vqid, cmd_address)); + rasterizer->Breadcrumb(u64(cmd_address)); rasterizer->DispatchDirect(); rasterizer->ScopeMarkerEnd(); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 3ebd9a971..779e55368 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -766,7 +766,8 @@ struct Liverpool { } TilingMode GetTilingMode() const { - return attrib.tile_mode_index; + return info.linear_general ? TilingMode::Display_Linear + : attrib.tile_mode_index.Value(); } bool IsTiled() const { diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h index e5f618cc2..5ab233fdc 100644 --- a/src/video_core/amdgpu/pm4_cmds.h +++ b/src/video_core/amdgpu/pm4_cmds.h @@ -282,6 +282,13 @@ enum class InterruptSelect : u32 { IrqUndocumented = 3, }; +static u64 GetGpuClock64() { + auto now = std::chrono::high_resolution_clock::now(); + auto duration = now.time_since_epoch(); + auto ticks = std::chrono::duration_cast(duration).count(); + return static_cast(ticks); +} + struct PM4CmdEventWriteEop { PM4Type3Header header; union { @@ -325,6 +332,10 @@ struct PM4CmdEventWriteEop { *Address() = DataQWord(); break; } + case DataSelect::GpuClock64: { + *Address() = GetGpuClock64(); + break; + } case DataSelect::PerfCounter: { *Address() = Common::FencedRDTSC(); break; @@ -652,13 +663,6 @@ struct PM4CmdReleaseMem { return data_lo | u64(data_hi) << 32; } - uint64_t GetGpuClock64() const { - auto now = std::chrono::high_resolution_clock::now(); - auto duration = now.time_since_epoch(); - auto ticks = std::chrono::duration_cast(duration).count(); - return static_cast(ticks); - } - void SignalFence(Platform::InterruptId irq_id) const { switch (data_sel.Value()) { case DataSelect::Data32Low: { diff --git a/src/video_core/amdgpu/pm4_opcodes.h b/src/video_core/amdgpu/pm4_opcodes.h index 8922c4ea3..fba0cbb9f 100644 --- a/src/video_core/amdgpu/pm4_opcodes.h +++ b/src/video_core/amdgpu/pm4_opcodes.h @@ -41,6 +41,7 @@ enum class PM4ItOpcode : u32 { CondIndirectBuffer = 0x3F, CopyData = 0x40, CommandProcessorDma = 0x41, + PfpSyncMe = 0x42, SurfaceSync = 0x43, CondWrite = 0x45, EventWrite = 0x46, diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e9498b352..d112864d5 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -106,10 +106,8 @@ Buffer::Buffer(const Vulkan::Instance& instance_, MemoryUsage usage_, VAddr cpu_ VmaAllocationInfo alloc_info{}; buffer.Create(buffer_ci, usage, &alloc_info); - if (instance->HasDebuggingToolAttached()) { - const auto device = instance->GetDevice(); - Vulkan::SetObjectName(device, Handle(), "Buffer {:#x} {} KiB", cpu_addr, size_bytes / 1024); - } + const auto device = instance->GetDevice(); + Vulkan::SetObjectName(device, Handle(), "Buffer {:#x}:{:#x}", cpu_addr, size_bytes); // Map it if it is host visible. VkMemoryPropertyFlags property_flags{}; @@ -152,10 +150,8 @@ StreamBuffer::StreamBuffer(const Vulkan::Instance& instance, Vulkan::Scheduler& ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); const auto device = instance.GetDevice(); - if (instance.HasDebuggingToolAttached()) { - Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}): {} KiB", BufferTypeName(usage), - size_bytes / 1024); - } + Vulkan::SetObjectName(device, Handle(), "StreamBuffer({}):{:#x}", BufferTypeName(usage), + size_bytes); } std::pair StreamBuffer::Map(u64 size, u64 alignment) { diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index e0d9da08f..d373fbffb 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -146,6 +146,10 @@ public: return offset; } + u64 GetFreeSize() const { + return size_bytes - offset - mapped_size; + } + private: struct Watch { u64 tick{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2d396daf0..eedba4c82 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -8,6 +8,7 @@ #include #include "common/assert.h" +#include "common/config.h" #include "sdl_window.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -213,6 +214,13 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_MAINTENANCE_4_EXTENSION_NAME); add_extension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); add_extension(VK_EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION_EXTENSION_NAME); + const bool has_sync2 = add_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + + if (has_sync2) { + has_nv_checkpoints = Config::isMarkersEnabled() + ? add_extension(VK_NV_DEVICE_DIAGNOSTIC_CHECKPOINTS_EXTENSION_NAME) + : false; + } const auto family_properties = physical_device.getQueueFamilyProperties(); if (family_properties.empty()) { @@ -308,6 +316,9 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceRobustness2FeaturesEXT{ .nullDescriptor = true, }, + vk::PhysicalDeviceSynchronization2Features{ + .synchronization2 = has_sync2, + }, }; if (!color_write_en) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index a8c0dcf45..2f2397d64 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -88,6 +88,10 @@ public: return profiler_context; } + bool HasNvCheckpoints() const { + return has_nv_checkpoints; + } + /// Returns true when a known debugging tool is attached. bool HasDebuggingToolAttached() const { return has_renderdoc || has_nsight_graphics; @@ -259,6 +263,7 @@ private: bool debug_utils_supported{}; bool has_nsight_graphics{}; bool has_renderdoc{}; + bool has_nv_checkpoints{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_platform.cpp b/src/video_core/renderer_vulkan/vk_platform.cpp index 0915514b8..33113c58b 100644 --- a/src/video_core/renderer_vulkan/vk_platform.cpp +++ b/src/video_core/renderer_vulkan/vk_platform.cpp @@ -221,12 +221,61 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT vk::Bool32 enable_sync = enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; - vk::LayerSettingEXT layer_set = { - .pLayerName = VALIDATION_LAYER_NAME, - .pSettingName = "validate_sync", - .type = vk::LayerSettingTypeEXT::eBool32, - .valueCount = 1, - .pValues = &enable_sync, + vk::Bool32 enable_gpuav = + enable_validation && Config::vkValidationSyncEnabled() ? vk::True : vk::False; + const char* gpuav_mode = enable_validation && Config::vkValidationGpuEnabled() + ? "GPU_BASED_GPU_ASSISTED" + : "GPU_BASED_NONE"; + const std::array layer_setings = { + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_sync", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "sync_queue_submit", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_sync, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "validate_gpu_based", + .type = vk::LayerSettingTypeEXT::eString, + .valueCount = 1, + .pValues = &gpuav_mode, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_reserve_binding_slot", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_descriptor_checks", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_validate_indirect_buffer", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, + vk::LayerSettingEXT{ + .pLayerName = VALIDATION_LAYER_NAME, + .pSettingName = "gpuav_buffer_copies", + .type = vk::LayerSettingTypeEXT::eBool32, + .valueCount = 1, + .pValues = &enable_gpuav, + }, }; vk::StructureChain instance_ci_chain = { @@ -238,8 +287,8 @@ vk::UniqueInstance CreateInstance(vk::DynamicLoader& dl, Frontend::WindowSystemT .ppEnabledExtensionNames = extensions.data(), }, vk::LayerSettingsCreateInfoEXT{ - .settingCount = 1, - .pSettings = &layer_set, + .settingCount = layer_setings.size(), + .pSettings = layer_setings.data(), }, }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 51de09f7a..542624a0e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -152,7 +152,8 @@ void Rasterizer::BeginRendering() { .stencil = regs.stencil_clear}}, }; texture_cache.TouchMeta(htile_address, false); - state.num_depth_attachments++; + state.has_depth = true; + state.has_stencil = image.info.usage.stencil; } scheduler.BeginRendering(state); } @@ -230,16 +231,42 @@ void Rasterizer::UpdateDepthStencilState() { cmdbuf.setDepthBoundsTestEnable(depth.depth_bounds_enable); } -void Rasterizer::ScopeMarkerBegin(const std::string& str) { +void Rasterizer::ScopeMarkerBegin(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.beginDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ - .pLabelName = str.c_str(), + .pLabelName = str.data(), }); } void Rasterizer::ScopeMarkerEnd() { + if (!Config::isMarkersEnabled()) { + return; + } + const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.endDebugUtilsLabelEXT(); } +void Rasterizer::ScopedMarkerInsert(const std::string_view& str) { + if (!Config::isMarkersEnabled()) { + return; + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.insertDebugUtilsLabelEXT(vk::DebugUtilsLabelEXT{ + .pLabelName = str.data(), + }); +} + +void Rasterizer::Breadcrumb(u64 id) { + if (!instance.HasNvCheckpoints()) { + return; + } + scheduler.CommandBuffer().setCheckpointNV(id); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 685ba6e00..a151ebc27 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -35,8 +35,10 @@ public: void DispatchDirect(); - void ScopeMarkerBegin(const std::string& str); + void ScopeMarkerBegin(const std::string_view& str); void ScopeMarkerEnd(); + void ScopedMarkerInsert(const std::string_view& str); + void Breadcrumb(u64 id); void InvalidateMemory(VAddr addr, u64 size); void MapMemory(VAddr addr, u64 size); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fb64285f1..a6c2536ba 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -38,8 +38,7 @@ void Scheduler::BeginRendering(const RenderState& new_state) { .layerCount = 1, .colorAttachmentCount = render_state.num_color_attachments, .pColorAttachments = render_state.color_attachments.data(), - .pDepthAttachment = - render_state.num_depth_attachments ? &render_state.depth_attachment : nullptr, + .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr, }; current_cmdbuf.beginRendering(rendering_info); @@ -50,6 +49,8 @@ void Scheduler::EndRendering() { return; } is_rendering = false; + current_cmdbuf.endRendering(); + boost::container::static_vector barriers; for (size_t i = 0; i < render_state.num_color_attachments; ++i) { barriers.push_back(vk::ImageMemoryBarrier{ @@ -70,10 +71,35 @@ void Scheduler::EndRendering() { }, }); } - current_cmdbuf.endRendering(); + if (render_state.has_depth) { + barriers.push_back(vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .oldLayout = render_state.depth_attachment.imageLayout, + .newLayout = render_state.depth_attachment.imageLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = render_state.depth_image, + .subresourceRange = + { + .aspectMask = vk::ImageAspectFlagBits::eDepth | + (render_state.has_stencil ? vk::ImageAspectFlagBits::eStencil + : vk::ImageAspectFlagBits::eNone), + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + } + if (!barriers.empty()) { - current_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, - vk::PipelineStageFlagBits::eFragmentShader, + const auto src_stages = + vk::PipelineStageFlagBits::eColorAttachmentOutput | + (render_state.has_depth ? vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eEarlyFragmentTests + : vk::PipelineStageFlagBits::eNone); + current_cmdbuf.pipelineBarrier(src_stages, vk::PipelineStageFlagBits::eFragmentShader, vk::DependencyFlagBits::eByRegion, {}, {}, barriers); } } @@ -158,6 +184,13 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { try { instance.GetGraphicsQueue().submit(submit_info, info.fence); } catch (vk::DeviceLostError& err) { + if (instance.HasNvCheckpoints()) { + const auto checkpoint_data = instance.GetGraphicsQueue().getCheckpointData2NV(); + for (const auto& cp : checkpoint_data) { + LOG_CRITICAL(Render_Vulkan, "{}: {:#x}", vk::to_string(cp.stage), + reinterpret_cast(cp.pCheckpointMarker)); + } + } UNREACHABLE_MSG("Device lost during submit: {}", err.what()); } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index b82d558ca..1140bfbc2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -20,7 +20,8 @@ struct RenderState { vk::RenderingAttachmentInfo depth_attachment{}; vk::Image depth_image{}; u32 num_color_attachments{}; - u32 num_depth_attachments{}; + bool has_depth{}; + bool has_stencil{}; u32 width = std::numeric_limits::max(); u32 height = std::numeric_limits::max(); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f7aef8471..f1148760e 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" +#include "common/config.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -154,6 +155,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, }; image.Create(image_ci); + + Vulkan::SetObjectName(instance->GetDevice(), (vk::Image)image, "Image {:#x}:{:#x}", + info.guest_address, info.guest_size_bytes); } void Image::Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index 94917be0a..17b78a6d5 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp @@ -189,6 +189,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice resources.layers = num_slices; meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; usage.depth_target = true; + usage.stencil = + buffer.stencil_info.format != AmdGpu::Liverpool::DepthBuffer::StencilFormat::Invalid; guest_address = buffer.Address(); const auto depth_slice_sz = buffer.GetDepthSliceSize(); @@ -260,7 +262,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { case AmdGpu::TilingMode::Display_MacroTiled: case AmdGpu::TilingMode::Texture_MacroTiled: case AmdGpu::TilingMode::Depth_MacroTiled: { - // ASSERT(!props.is_cube && !props.is_block); + ASSERT(!props.is_block); ASSERT(num_samples == 1); std::tie(mip_info.pitch, mip_info.size) = ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index ef6163c4e..cbf77f2d8 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -92,6 +92,8 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Liverpool::ColorBuffer& col_buffer, bool is_vo_surface) noexcept { const auto base_format = Vulkan::LiverpoolToVK::SurfaceFormat(col_buffer.info.format, col_buffer.NumFormat()); + range.base.layer = col_buffer.view.slice_start; + range.extent.layers = col_buffer.NumSlices(); format = Vulkan::LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buffer.info.comp_swap.Value(), is_vo_surface); } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d3a7d7960..75fa378cd 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -194,6 +194,7 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32G32Sfloat: case vk::Format::eR32G32Uint: case vk::Format::eR16G16B16A16Unorm: + case vk::Format::eR16G16B16A16Sfloat: return vk::Format::eR32G32Uint; case vk::Format::eBc2SrgbBlock: case vk::Format::eBc2UnormBlock: @@ -397,7 +398,7 @@ std::optional TileManager::TryDetile(Image& image) { const u32 image_size = image.info.guest_size_bytes; const auto [in_buffer, in_offset] = [&] -> std::pair { // Use stream buffer for smaller textures. - if (image_size <= StreamBufferSize) { + if (image_size <= stream_buffer.GetFreeSize()) { u32 offset = stream_buffer.Copy(image.info.guest_address, image_size); return {stream_buffer.Handle(), offset}; }