From 77da8bac00ef22b8b79abd3cde284abdf3f18514 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 6 Dec 2024 00:46:34 +0200 Subject: [PATCH 1/8] core: Return proper address of eh frame/add more opcodes --- src/core/module.cpp | 4 ++-- src/emulator.cpp | 2 +- .../frontend/translate/scalar_alu.cpp | 21 +++++++++++++++++++ .../frontend/translate/translate.h | 2 ++ src/video_core/amdgpu/liverpool.cpp | 4 ++-- 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/core/module.cpp b/src/core/module.cpp index ef34f25c1..70afb932c 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -470,8 +470,8 @@ OrbisKernelModuleInfoEx Module::GetModuleInfoEx() const { .tls_align = tls.align, .init_proc_addr = base_virtual_addr + dynamic_info.init_virtual_addr, .fini_proc_addr = base_virtual_addr + dynamic_info.fini_virtual_addr, - .eh_frame_hdr_addr = eh_frame_hdr_addr, - .eh_frame_addr = eh_frame_addr, + .eh_frame_hdr_addr = base_virtual_addr + eh_frame_hdr_addr, + .eh_frame_addr = base_virtual_addr + eh_frame_addr, .eh_frame_hdr_size = eh_frame_hdr_size, .eh_frame_size = eh_frame_size, .segments = info.segments, diff --git a/src/emulator.cpp b/src/emulator.cpp index 60d6e18d7..8a7c04cf4 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -266,7 +266,7 @@ void Emulator::Run(const std::filesystem::path& file) { } void Emulator::LoadSystemModules(const std::filesystem::path& file, std::string game_serial) { - constexpr std::array ModulesToLoad{ + constexpr std::array ModulesToLoad{ {{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2}, {"libSceFiber.sprx", &Libraries::Fiber::RegisterlibSceFiber}, {"libSceUlt.sprx", nullptr}, diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index de8b9da87..75ad957b3 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -50,6 +50,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_OR_B64(NegateMode::None, false, inst); case Opcode::S_XOR_B32: return S_XOR_B32(inst); + case Opcode::S_NOT_B32: + return S_NOT_B32(inst); case Opcode::S_XOR_B64: return S_OR_B64(NegateMode::None, true, inst); case Opcode::S_ANDN2_B32: @@ -94,6 +96,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) { return S_BREV_B32(inst); case Opcode::S_BCNT1_I32_B64: return S_BCNT1_I32_B64(inst); + case Opcode::S_FF1_I32_B64: + return S_FF1_I32_B64(inst); case Opcode::S_AND_SAVEEXEC_B64: return S_SAVEEXEC_B64(NegateMode::None, false, inst); case Opcode::S_ORN2_SAVEEXEC_B64: @@ -301,6 +305,10 @@ void Translator::S_AND_B64(NegateMode negate, const GcnInst& inst) { ASSERT_MSG(-s32(operand.code) + SignedConstIntNegMin - 1 == -1, "SignedConstIntNeg must be -1"); return ir.Imm1(true); + case OperandField::LiteralConst: + ASSERT_MSG(operand.code == 0 || operand.code == std::numeric_limits::max(), + "Unsupported literal {:#x}", operand.code); + return ir.Imm1(operand.code & 1); default: UNREACHABLE(); } @@ -382,6 +390,13 @@ void Translator::S_XOR_B32(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_NOT_B32(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 result{ir.BitwiseNot(src0)}; + SetDst(inst.dst[0], result); + ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); +} + void Translator::S_LSHL_B32(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::U32 src1{GetSrc(inst.src[1])}; @@ -560,6 +575,12 @@ void Translator::S_BCNT1_I32_B64(const GcnInst& inst) { ir.SetScc(ir.INotEqual(result, ir.Imm32(0))); } +void Translator::S_FF1_I32_B64(const GcnInst& inst) { + const IR::U32 src0{GetSrc(inst.src[0])}; + const IR::U32 result{ir.Select(ir.IEqual(src0, ir.Imm32(0U)), ir.Imm32(-1), ir.FindILsb(src0))}; + SetDst(inst.dst[0], result); +} + void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst) { // This instruction normally operates on 64-bit data (EXEC, VCC, SGPRs) // However here we flatten it to 1-bit EXEC and 1-bit VCC. For the destination diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 3b89372bd..dd379d8ea 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -96,6 +96,7 @@ public: void S_MUL_I32(const GcnInst& inst); void S_BFE_U32(const GcnInst& inst); void S_ABSDIFF_I32(const GcnInst& inst); + void S_NOT_B32(const GcnInst& inst); // SOPK void S_MOVK(const GcnInst& inst); @@ -109,6 +110,7 @@ public: void S_NOT_B64(const GcnInst& inst); void S_BREV_B32(const GcnInst& inst); void S_BCNT1_I32_B64(const GcnInst& inst); + void S_FF1_I32_B64(const GcnInst& inst); void S_GETPC_B64(u32 pc, const GcnInst& inst); void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst); diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 1bbd77f82..c0c5f1b2f 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -565,7 +565,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - if (dma_data->dst_addr_lo == 0x3022C) { + if (dma_data->dst_addr_lo == 0x3022C || !rasterizer) { break; } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { @@ -700,7 +700,7 @@ Liverpool::Task Liverpool::ProcessCompute(std::span acb, int vqid) { } case PM4ItOpcode::DmaData: { const auto* dma_data = reinterpret_cast(header); - if (dma_data->dst_addr_lo == 0x3022C) { + if (dma_data->dst_addr_lo == 0x3022C || !rasterizer) { break; } if (dma_data->src_sel == DmaDataSrc::Data && dma_data->dst_sel == DmaDataDst::Gds) { From 17abbcd74d5c21badda079a8e71fa9fa4c20ea30 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:21:35 -0800 Subject: [PATCH 2/8] misc: Fix clang format (#1673) --- src/video_core/amdgpu/liverpool.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index c0c5f1b2f..a4eae8e7a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -163,8 +163,8 @@ Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { } case PM4ItOpcode::IndirectBufferConst: { const auto* indirect_buffer = reinterpret_cast(header); - auto task = ProcessCeUpdate( - {indirect_buffer->Address(), indirect_buffer->ib_size}); + auto task = + ProcessCeUpdate({indirect_buffer->Address(), indirect_buffer->ib_size}); while (!task.handle.done()) { task.handle.resume(); From d05846a327e609ba514c981c0f28777c77914271 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 6 Dec 2024 02:59:55 -0800 Subject: [PATCH 3/8] specialization: Fix fetch shader field type (#1675) --- src/shader_recompiler/frontend/fetch_shader.h | 13 ------------- src/shader_recompiler/specialization.h | 19 +++++++++---------- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 ++ .../renderer_vulkan/vk_rasterizer.cpp | 18 +++++++++++++++++- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index ee9f5c805..080b0eb22 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -58,19 +58,6 @@ struct FetchShaderData { }) != attributes.end(); } - [[nodiscard]] std::pair GetDrawOffsets(const AmdGpu::Liverpool::Regs& regs, - const Info& info) const { - u32 vertex_offset = regs.index_offset; - u32 instance_offset = 0; - if (vertex_offset == 0 && vertex_offset_sgpr != -1) { - vertex_offset = info.user_data[vertex_offset_sgpr]; - } - if (instance_offset_sgpr != -1) { - instance_offset = info.user_data[instance_offset_sgpr]; - } - return {vertex_offset, instance_offset}; - } - bool operator==(const FetchShaderData& other) const { return attributes == other.attributes && vertex_offset_sgpr == other.vertex_offset_sgpr && instance_offset_sgpr == other.instance_offset_sgpr; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 740b89dda..82c064640 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -57,7 +57,7 @@ struct StageSpecialization { const Shader::Info* info; RuntimeInfo runtime_info; - Gcn::FetchShaderData fetch_shader_data{}; + std::optional fetch_shader_data{}; boost::container::small_vector vs_attribs; std::bitset bitset{}; boost::container::small_vector buffers; @@ -69,15 +69,14 @@ struct StageSpecialization { explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_, const Profile& profile_, Backend::Bindings start_) : info{&info_}, runtime_info{runtime_info_}, start{start_} { - if (const auto fetch_shader = Gcn::ParseFetchShader(info_)) { - fetch_shader_data = *fetch_shader; - if (info_.stage == Stage::Vertex && !profile_.support_legacy_vertex_attributes) { - // Specialize shader on VS input number types to follow spec. - ForEachSharp(vs_attribs, fetch_shader_data.attributes, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); - }); - } + fetch_shader_data = Gcn::ParseFetchShader(info_); + if (info_.stage == Stage::Vertex && fetch_shader_data && + !profile_.support_legacy_vertex_attributes) { + // Specialize shader on VS input number types to follow spec. + ForEachSharp(vs_attribs, fetch_shader_data->attributes, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.num_class = AmdGpu::GetNumberClass(sharp.GetNumberFmt()); + }); } u32 binding{}; if (info->has_readconst) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 47713f0ff..82a029b95 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -279,6 +279,8 @@ bool PipelineCache::RefreshGraphicsKey() { ++remapped_cb; } + fetch_shader = std::nullopt; + Shader::Backend::Bindings binding{}; const auto& TryBindStageRemap = [&](Shader::Stage stage_in, Shader::Stage stage_out) -> bool { const auto stage_in_idx = static_cast(stage_in); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 620e5f103..e2b6d9749 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -171,6 +171,22 @@ RenderState Rasterizer::PrepareRenderState(u32 mrt_mask) { return state; } +[[nodiscard]] std::pair GetDrawOffsets( + const AmdGpu::Liverpool::Regs& regs, const Shader::Info& info, + const std::optional& fetch_shader) { + u32 vertex_offset = regs.index_offset; + u32 instance_offset = 0; + if (fetch_shader) { + if (vertex_offset == 0 && fetch_shader->vertex_offset_sgpr != -1) { + vertex_offset = info.user_data[fetch_shader->vertex_offset_sgpr]; + } + if (fetch_shader->instance_offset_sgpr != -1) { + instance_offset = info.user_data[fetch_shader->instance_offset_sgpr]; + } + } + return {vertex_offset, instance_offset}; +} + void Rasterizer::Draw(bool is_indexed, u32 index_offset) { RENDERER_TRACE; @@ -198,7 +214,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { BeginRendering(*pipeline, state); UpdateDynamicState(*pipeline); - const auto [vertex_offset, instance_offset] = fetch_shader->GetDrawOffsets(regs, vs_info); + const auto [vertex_offset, instance_offset] = GetDrawOffsets(regs, vs_info, fetch_shader); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); From 9e618c0e0c14d9fd7daf040509d71392356054be Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Fri, 6 Dec 2024 19:54:59 +0200 Subject: [PATCH 4/8] video_core: Add multipler to handle special cases of texture buffer stride mismatch (#1640) * page_manager: Enable userfaultfd by default * Much faster than page faults and causes less problems * shader_recompiler: Add texel buffer multiplier * Fixes format mismatch assert when vsharp stride is multiple of format stride * shader_recompiler: Specialize UBOs on size * Some games can perform manual vertex pulling and thus bind read only buffers of varying size. We only recompile when the vsharp size is larger than size in shader, in opposite case its not needed * clang format --- CMakeLists.txt | 4 ++++ .../backend/spirv/emit_spirv_context_get_set.cpp | 8 ++++++-- .../backend/spirv/spirv_emit_context.cpp | 2 ++ .../backend/spirv/spirv_emit_context.h | 1 + src/shader_recompiler/info.h | 5 +++++ src/shader_recompiler/specialization.h | 10 ++++++++-- src/video_core/page_manager.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 +++-- 8 files changed, 30 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 378b8f78d..ae6d1d74e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -875,6 +875,10 @@ target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAlloca target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + target_compile_definitions(shadps4 PRIVATE ENABLE_USERFAULTFD) +endif() + if (APPLE) option(USE_SYSTEM_VULKAN_LOADER "Enables using the system Vulkan loader instead of directly linking with MoltenVK. Useful for loading validation layers." OFF) if (USE_SYSTEM_VULKAN_LOADER) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index d8c0a17bd..b578f0c52 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -326,7 +326,9 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& buffer = ctx.texture_buffers[handle]; const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); - const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); + const Id coord = + ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift), + buffer.coord_offset); Id texel = buffer.is_storage ? ctx.OpImageRead(buffer.result_type, tex_buffer, coord) : ctx.OpImageFetch(buffer.result_type, tex_buffer, coord); if (buffer.is_integer) { @@ -372,7 +374,9 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { const auto& buffer = ctx.texture_buffers[handle]; const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id); - const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset); + const Id coord = + ctx.OpIAdd(ctx.U32[1], ctx.OpShiftLeftLogical(ctx.U32[1], address, buffer.coord_shift), + buffer.coord_offset); if (buffer.is_integer) { value = ctx.OpBitcast(buffer.result_type, value); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 4ce9f4221..5c7278c6b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -207,6 +207,8 @@ void EmitContext::DefineBufferOffsets() { push_data_block, ConstU32(half), ConstU32(comp))}; const Id value{OpLoad(U32[1], ptr)}; tex_buffer.coord_offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(6U)); + tex_buffer.coord_shift = + OpBitFieldUExtract(U32[1], value, ConstU32(offset + 6U), ConstU32(2U)); Name(tex_buffer.coord_offset, fmt::format("texbuf{}_off", binding)); } } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1c5da946d..4e5e7dd3b 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -223,6 +223,7 @@ public: struct TextureBufferDefinition { Id id; Id coord_offset; + Id coord_shift; u32 binding; Id image_type; Id result_type; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index d382d0e7c..494bbb4bb 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -105,6 +105,11 @@ struct PushData { ASSERT(offset < 256 && binding < buf_offsets.size()); buf_offsets[binding] = offset; } + + void AddTexelOffset(u32 binding, u32 multiplier, u32 texel_offset) { + ASSERT(texel_offset < 64 && multiplier < 16); + buf_offsets[binding] = texel_offset | ((std::bit_width(multiplier) - 1) << 6); + } }; static_assert(sizeof(PushData) <= 128, "PushData size is greater than minimum size guaranteed by Vulkan spec"); diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 82c064640..2a3bd62f4 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -9,7 +9,6 @@ #include "frontend/fetch_shader.h" #include "shader_recompiler/backend/bindings.h" #include "shader_recompiler/info.h" -#include "shader_recompiler/ir/passes/srt.h" namespace Shader { @@ -22,8 +21,12 @@ struct VsAttribSpecialization { struct BufferSpecialization { u16 stride : 14; u16 is_storage : 1; + u32 size = 0; - auto operator<=>(const BufferSpecialization&) const = default; + bool operator==(const BufferSpecialization& other) const { + return stride == other.stride && is_storage == other.is_storage && + (size >= other.is_storage || is_storage); + } }; struct TextureBufferSpecialization { @@ -86,6 +89,9 @@ struct StageSpecialization { [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.stride = sharp.GetStride(); spec.is_storage = desc.IsStorage(sharp); + if (!spec.is_storage) { + spec.size = sharp.GetSize(); + } }); ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index d26a7067a..80b91b825 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -29,7 +29,7 @@ namespace VideoCore { constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGEBITS = 12; -#if ENABLE_USERFAULTFD +#ifdef ENABLE_USERFAULTFD struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e2b6d9749..4e858c0d3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -548,12 +548,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( vsharp.base_address, vsharp.GetSize(), desc.is_written, true, buffer_id); const u32 fmt_stride = AmdGpu::NumBits(vsharp.GetDataFmt()) >> 3; - ASSERT_MSG(fmt_stride == vsharp.GetStride(), + const u32 buf_stride = vsharp.GetStride(); + ASSERT_MSG(buf_stride % fmt_stride == 0, "Texel buffer stride must match format stride"); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; ASSERT(adjust % fmt_stride == 0); - push_data.AddOffset(binding.buffer, adjust / fmt_stride); + push_data.AddTexelOffset(binding.buffer, buf_stride / fmt_stride, adjust / fmt_stride); buffer_view = vk_buffer->View(offset_aligned, vsharp.GetSize() + adjust, desc.is_written, vsharp.GetDataFmt(), vsharp.GetNumberFmt()); From 6acfdd5e33cdb4b2484e162916242f40eaa9dcc6 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle Date: Fri, 6 Dec 2024 20:00:21 +0200 Subject: [PATCH 5/8] buffer_cache: Bump usable address space to 40bits * Fixes crashes in games that use the upper region of user area --- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/buffer_cache/memory_tracker_base.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b1bf77f8a..3dab95db7 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -42,7 +42,7 @@ public: struct Traits { using Entry = BufferId; - static constexpr size_t AddressSpaceBits = 39; + static constexpr size_t AddressSpaceBits = 40; static constexpr size_t FirstLevelBits = 14; static constexpr size_t PageBits = CACHING_PAGEBITS; }; diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h index 375701c4c..a59bcfff5 100644 --- a/src/video_core/buffer_cache/memory_tracker_base.h +++ b/src/video_core/buffer_cache/memory_tracker_base.h @@ -14,7 +14,7 @@ namespace VideoCore { class MemoryTracker { public: - static constexpr size_t MAX_CPU_PAGE_BITS = 39; + static constexpr size_t MAX_CPU_PAGE_BITS = 40; static constexpr size_t HIGHER_PAGE_BITS = 22; static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; From 357b7829c3eee85d59b620fcfde8562195f50ce2 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle Date: Fri, 6 Dec 2024 21:50:25 +0200 Subject: [PATCH 6/8] hot-fix: Silence depth macrotiled warning --- src/video_core/texture_cache/tile_manager.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 7430168d0..9823cb4dc 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -392,7 +392,8 @@ std::pair TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o const auto* detiler = GetDetiler(image); if (!detiler) { if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled && - image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled) { + image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled && + image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) { LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})", vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode)); } From 7ffa581d4b1aea106485ab3e6957836b5ad22f02 Mon Sep 17 00:00:00 2001 From: "Daniel R." <47796739+polybiusproxy@users.noreply.github.com> Date: Fri, 6 Dec 2024 22:04:36 +0100 Subject: [PATCH 7/8] The way to Unity, pt.2 (#1671) --- CMakeLists.txt | 5 +- src/common/ntapi.cpp | 2 - src/common/ntapi.h | 9 +- src/common/thread.cpp | 14 ++- src/common/thread.h | 2 + src/core/devices/logger.cpp | 1 + src/core/libraries/kernel/sync/mutex.cpp | 52 ++++++++ src/core/libraries/kernel/sync/mutex.h | 80 ++++++++++++ src/core/libraries/kernel/sync/semaphore.h | 117 ++++++++++++++++++ src/core/libraries/kernel/threads/condvar.cpp | 4 +- .../libraries/kernel/threads/event_flag.cpp | 1 - src/core/libraries/kernel/threads/pthread.cpp | 1 + src/core/libraries/kernel/threads/pthread.h | 8 +- .../libraries/kernel/threads/semaphore.cpp | 26 ++-- src/core/libraries/kernel/time.cpp | 17 ++- 15 files changed, 311 insertions(+), 28 deletions(-) create mode 100644 src/core/libraries/kernel/sync/mutex.cpp create mode 100644 src/core/libraries/kernel/sync/mutex.h create mode 100644 src/core/libraries/kernel/sync/semaphore.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ae6d1d74e..84146bb01 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,7 +210,10 @@ set(GNM_LIB src/core/libraries/gnmdriver/gnmdriver.cpp src/core/libraries/gnmdriver/gnm_error.h ) -set(KERNEL_LIB src/core/libraries/kernel/threads/condvar.cpp +set(KERNEL_LIB src/core/libraries/kernel/sync/mutex.cpp + src/core/libraries/kernel/sync/mutex.h + src/core/libraries/kernel/sync/semaphore.h + src/core/libraries/kernel/threads/condvar.cpp src/core/libraries/kernel/threads/event_flag.cpp src/core/libraries/kernel/threads/exception.cpp src/core/libraries/kernel/threads/exception.h diff --git a/src/common/ntapi.cpp b/src/common/ntapi.cpp index ffdedb17f..e0ff1cef0 100644 --- a/src/common/ntapi.cpp +++ b/src/common/ntapi.cpp @@ -6,7 +6,6 @@ #include "ntapi.h" NtClose_t NtClose = nullptr; -NtDelayExecution_t NtDelayExecution = nullptr; NtSetInformationFile_t NtSetInformationFile = nullptr; NtCreateThread_t NtCreateThread = nullptr; NtTerminateThread_t NtTerminateThread = nullptr; @@ -18,7 +17,6 @@ void Initialize() { // http://stackoverflow.com/a/31411628/4725495 NtClose = (NtClose_t)GetProcAddress(nt_handle, "NtClose"); - NtDelayExecution = (NtDelayExecution_t)GetProcAddress(nt_handle, "NtDelayExecution"); NtSetInformationFile = (NtSetInformationFile_t)GetProcAddress(nt_handle, "NtSetInformationFile"); NtCreateThread = (NtCreateThread_t)GetProcAddress(nt_handle, "NtCreateThread"); diff --git a/src/common/ntapi.h b/src/common/ntapi.h index 743174061..cb1ba7f1c 100644 --- a/src/common/ntapi.h +++ b/src/common/ntapi.h @@ -408,7 +408,7 @@ typedef struct _TEB { /* win32/win64 */ #ifdef _WIN64 PVOID SystemReserved1[30]; /* /0190 */ #else - PVOID SystemReserved1[26]; /* 10c/ used for krnl386 private data in Wine */ + PVOID SystemReserved1[26]; /* 10c/ */ #endif char PlaceholderCompatibilityMode; /* 174/0280 */ BOOLEAN PlaceholderHydrationAlwaysExplicit; /* 175/0281 */ @@ -430,13 +430,13 @@ typedef struct _TEB { /* win32/win64 */ BYTE SpareBytes1[23]; /* 1b9/ */ ULONG TxFsContext; /* 1d0/ */ #endif - GDI_TEB_BATCH GdiTebBatch; /* 1d4/02f0 used for ntdll private data in Wine */ + GDI_TEB_BATCH GdiTebBatch; /* 1d4/02f0 */ CLIENT_ID RealClientId; /* 6b4/07d8 */ HANDLE GdiCachedProcessHandle; /* 6bc/07e8 */ ULONG GdiClientPID; /* 6c0/07f0 */ ULONG GdiClientTID; /* 6c4/07f4 */ PVOID GdiThreadLocaleInfo; /* 6c8/07f8 */ - ULONG_PTR Win32ClientInfo[62]; /* 6cc/0800 used for user32 private data in Wine */ + ULONG_PTR Win32ClientInfo[62]; /* 6cc/0800 */ PVOID glDispatchTable[233]; /* 7c4/09f0 */ PVOID glReserved1[29]; /* b68/1138 */ PVOID glReserved2; /* bdc/1220 */ @@ -511,8 +511,6 @@ static_assert(offsetof(TEB, DeallocationStack) == typedef u64(__stdcall* NtClose_t)(HANDLE Handle); -typedef u64(__stdcall* NtDelayExecution_t)(BOOL Alertable, PLARGE_INTEGER DelayInterval); - typedef u64(__stdcall* NtSetInformationFile_t)(HANDLE FileHandle, PIO_STATUS_BLOCK IoStatusBlock, PVOID FileInformation, ULONG Length, FILE_INFORMATION_CLASS FileInformationClass); @@ -525,7 +523,6 @@ typedef u64(__stdcall* NtCreateThread_t)(PHANDLE ThreadHandle, ACCESS_MASK Desir typedef u64(__stdcall* NtTerminateThread_t)(HANDLE ThreadHandle, u64 ExitStatus); extern NtClose_t NtClose; -extern NtDelayExecution_t NtDelayExecution; extern NtSetInformationFile_t NtSetInformationFile; extern NtCreateThread_t NtCreateThread; extern NtTerminateThread_t NtTerminateThread; diff --git a/src/common/thread.cpp b/src/common/thread.cpp index 46df68c38..c87aea6ef 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -147,6 +147,10 @@ void SetCurrentThreadName(const char* name) { SetThreadDescription(GetCurrentThread(), UTF8ToUTF16W(name).data()); } +void SetThreadName(void* thread, const char* name) { + SetThreadDescription(thread, UTF8ToUTF16W(name).data()); +} + #else // !MSVC_VER, so must be POSIX threads // MinGW with the POSIX threading model does not support pthread_setname_np @@ -170,11 +174,19 @@ void SetCurrentThreadName(const char* name) { pthread_setname_np(pthread_self(), name); #endif } + +void SetThreadName(void* thread, const char* name) { + // TODO +} #endif #if defined(_WIN32) void SetCurrentThreadName(const char*) { - // Do Nothing on MingW + // Do Nothing on MinGW +} + +void SetThreadName(void* thread, const char* name) { + // Do Nothing on MinGW } #endif diff --git a/src/common/thread.h b/src/common/thread.h index fd962f8e5..175ba9445 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -23,6 +23,8 @@ void SetCurrentThreadPriority(ThreadPriority new_priority); void SetCurrentThreadName(const char* name); +void SetThreadName(void* thread, const char* name); + class AccurateTimer { std::chrono::nanoseconds target_interval{}; std::chrono::nanoseconds total_wait{}; diff --git a/src/core/devices/logger.cpp b/src/core/devices/logger.cpp index bf5a28382..6f104509c 100644 --- a/src/core/devices/logger.cpp +++ b/src/core/devices/logger.cpp @@ -15,6 +15,7 @@ s64 Logger::write(const void* buf, size_t nbytes) { log(static_cast(buf), nbytes); return nbytes; } + size_t Logger::writev(const Libraries::Kernel::SceKernelIovec* iov, int iovcnt) { for (int i = 0; i < iovcnt; i++) { log(static_cast(iov[i].iov_base), iov[i].iov_len); diff --git a/src/core/libraries/kernel/sync/mutex.cpp b/src/core/libraries/kernel/sync/mutex.cpp new file mode 100644 index 000000000..c5e3eba1d --- /dev/null +++ b/src/core/libraries/kernel/sync/mutex.cpp @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "mutex.h" + +#include "common/assert.h" + +namespace Libraries::Kernel { + +TimedMutex::TimedMutex() { +#ifdef _WIN64 + mtx = CreateMutex(nullptr, false, nullptr); + ASSERT(mtx); +#endif +} + +TimedMutex::~TimedMutex() { +#ifdef _WIN64 + CloseHandle(mtx); +#endif +} + +void TimedMutex::lock() { +#ifdef _WIN64 + for (;;) { + u64 res = WaitForSingleObjectEx(mtx, INFINITE, true); + if (res == WAIT_OBJECT_0) { + return; + } + } +#else + mtx.lock(); +#endif +} + +bool TimedMutex::try_lock() { +#ifdef _WIN64 + return WaitForSingleObjectEx(mtx, 0, true) == WAIT_OBJECT_0; +#else + return mtx.try_lock(); +#endif +} + +void TimedMutex::unlock() { +#ifdef _WIN64 + ReleaseMutex(mtx); +#else + mtx.unlock(); +#endif +} + +} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/mutex.h b/src/core/libraries/kernel/sync/mutex.h new file mode 100644 index 000000000..f14a920b4 --- /dev/null +++ b/src/core/libraries/kernel/sync/mutex.h @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/types.h" + +#ifdef _WIN64 +#include +#else +#include +#endif + +namespace Libraries::Kernel { + +class TimedMutex { +public: + TimedMutex(); + ~TimedMutex(); + + void lock(); + bool try_lock(); + + void unlock(); + + template + bool try_lock_for(const std::chrono::duration& rel_time) { +#ifdef _WIN64 + constexpr auto zero = std::chrono::duration::zero(); + const auto now = std::chrono::steady_clock::now(); + + std::chrono::steady_clock::time_point abs_time = now; + if (rel_time > zero) { + constexpr auto max = (std::chrono::steady_clock::time_point::max)(); + if (abs_time < max - rel_time) { + abs_time += rel_time; + } else { + abs_time = max; + } + } + + return try_lock_until(abs_time); +#else + return mtx.try_lock_for(rel_time); +#endif + } + + template + bool try_lock_until(const std::chrono::time_point& abs_time) { +#ifdef _WIN64 + for (;;) { + const auto now = Clock::now(); + if (abs_time <= now) { + return false; + } + + const auto rel_ms = std::chrono::ceil(abs_time - now); + u64 res = WaitForSingleObjectEx(mtx, static_cast(rel_ms.count()), true); + if (res == WAIT_OBJECT_0) { + return true; + } else if (res == WAIT_TIMEOUT) { + return false; + } + } +#else + return mtx.try_lock_until(abs_time); +#endif + } + +private: +#ifdef _WIN64 + HANDLE mtx; +#else + std::timed_mutex mtx; +#endif +}; + +} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/sync/semaphore.h b/src/core/libraries/kernel/sync/semaphore.h new file mode 100644 index 000000000..a103472c8 --- /dev/null +++ b/src/core/libraries/kernel/sync/semaphore.h @@ -0,0 +1,117 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "common/assert.h" +#include "common/types.h" + +#ifdef _WIN64 +#include +#else +#include +#endif + +namespace Libraries::Kernel { + +template +class Semaphore { +public: + Semaphore(s32 initialCount) +#ifndef _WIN64 + : sem{initialCount} +#endif + { +#ifdef _WIN64 + sem = CreateSemaphore(nullptr, initialCount, max, nullptr); + ASSERT(sem); +#endif + } + + ~Semaphore() { +#ifdef _WIN64 + CloseHandle(sem); +#endif + } + + void release() { +#ifdef _WIN64 + ReleaseSemaphore(sem, 1, nullptr); +#else + sem.release(); +#endif + } + + void acquire() { +#ifdef _WIN64 + for (;;) { + u64 res = WaitForSingleObjectEx(sem, INFINITE, true); + if (res == WAIT_OBJECT_0) { + return; + } + } +#else + sem.acquire(); +#endif + } + + bool try_acquire() { +#ifdef _WIN64 + return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0; +#else + return sem.try_acquire(); +#endif + } + + template + bool try_acquire_for(const std::chrono::duration& rel_time) { +#ifdef _WIN64 + const auto rel_time_ms = std::chrono::ceil(rel_time); + const u64 timeout_ms = static_cast(rel_time_ms.count()); + + if (timeout_ms == 0) { + return false; + } + + return WaitForSingleObjectEx(sem, timeout_ms, true) == WAIT_OBJECT_0; +#else + return sem.try_acquire_for(rel_time); +#endif + } + + template + bool try_acquire_until(const std::chrono::time_point& abs_time) { +#ifdef _WIN64 + const auto now = Clock::now(); + if (now >= abs_time) { + return false; + } + + const auto rel_time = std::chrono::ceil(abs_time - now); + const u64 timeout_ms = static_cast(rel_time.count()); + if (timeout_ms == 0) { + return false; + } + + u64 res = WaitForSingleObjectEx(sem, static_cast(timeout_ms), true); + return res == WAIT_OBJECT_0; +#else + return sem.try_acquire_until(abs_time); +#endif + } + +private: +#ifdef _WIN64 + HANDLE sem; +#else + std::counting_semaphore sem; +#endif +}; + +using BinarySemaphore = Semaphore<1>; +using CountingSemaphore = Semaphore<0x7FFFFFFF /*ORBIS_KERNEL_SEM_VALUE_MAX*/>; + +} // namespace Libraries::Kernel \ No newline at end of file diff --git a/src/core/libraries/kernel/threads/condvar.cpp b/src/core/libraries/kernel/threads/condvar.cpp index cbe8f6ca7..2927899d9 100644 --- a/src/core/libraries/kernel/threads/condvar.cpp +++ b/src/core/libraries/kernel/threads/condvar.cpp @@ -191,7 +191,7 @@ int PthreadCond::Signal() { PthreadMutex* mp = td->mutex_obj; has_user_waiters = SleepqRemove(sq, td); - std::binary_semaphore* waddr = nullptr; + BinarySemaphore* waddr = nullptr; if (mp->m_owner == curthread) { if (curthread->nwaiter_defer >= Pthread::MaxDeferWaiters) { curthread->WakeAll(); @@ -211,7 +211,7 @@ int PthreadCond::Signal() { struct BroadcastArg { Pthread* curthread; - std::binary_semaphore* waddrs[Pthread::MaxDeferWaiters]; + BinarySemaphore* waddrs[Pthread::MaxDeferWaiters]; int count; }; diff --git a/src/core/libraries/kernel/threads/event_flag.cpp b/src/core/libraries/kernel/threads/event_flag.cpp index 39925153c..ce75bed9e 100644 --- a/src/core/libraries/kernel/threads/event_flag.cpp +++ b/src/core/libraries/kernel/threads/event_flag.cpp @@ -118,7 +118,6 @@ public: } m_bits |= bits; - m_cond_var.notify_all(); } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index a562c51b2..b2fe09934 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -380,6 +380,7 @@ int PS4_SYSV_ABI posix_sched_get_priority_min() { int PS4_SYSV_ABI posix_pthread_rename_np(PthreadT thread, const char* name) { LOG_INFO(Kernel_Pthread, "name = {}", name); + Common::SetThreadName(reinterpret_cast(thread->native_thr.GetHandle()), name); thread->name = name; return ORBIS_OK; } diff --git a/src/core/libraries/kernel/threads/pthread.h b/src/core/libraries/kernel/threads/pthread.h index 9d71c75e8..456c2ef37 100644 --- a/src/core/libraries/kernel/threads/pthread.h +++ b/src/core/libraries/kernel/threads/pthread.h @@ -11,6 +11,8 @@ #include #include "common/enum.h" +#include "core/libraries/kernel/sync/mutex.h" +#include "core/libraries/kernel/sync/semaphore.h" #include "core/libraries/kernel/time.h" #include "core/thread.h" #include "core/tls.h" @@ -44,7 +46,7 @@ enum class PthreadMutexProt : u32 { }; struct PthreadMutex { - std::timed_mutex m_lock; + TimedMutex m_lock; PthreadMutexFlags m_flags; Pthread* m_owner; int m_count; @@ -288,14 +290,14 @@ struct Pthread { int report_events; int event_mask; std::string name; - std::binary_semaphore wake_sema{0}; + BinarySemaphore wake_sema{0}; SleepQueue* sleepqueue; void* wchan; PthreadMutex* mutex_obj; bool will_sleep; bool has_user_waiters; int nwaiter_defer; - std::binary_semaphore* defer_waiters[MaxDeferWaiters]; + BinarySemaphore* defer_waiters[MaxDeferWaiters]; bool InCritical() const noexcept { return locklevel > 0 || critical_count > 0; diff --git a/src/core/libraries/kernel/threads/semaphore.cpp b/src/core/libraries/kernel/threads/semaphore.cpp index e3c7e9092..5aa04f251 100644 --- a/src/core/libraries/kernel/threads/semaphore.cpp +++ b/src/core/libraries/kernel/threads/semaphore.cpp @@ -6,6 +6,8 @@ #include #include +#include "core/libraries/kernel/sync/semaphore.h" + #include "common/logging/log.h" #include "core/libraries/kernel/kernel.h" #include "core/libraries/kernel/orbis_error.h" @@ -21,7 +23,7 @@ constexpr int ORBIS_KERNEL_SEM_VALUE_MAX = 0x7FFFFFFF; struct PthreadSem { explicit PthreadSem(s32 value_) : semaphore{value_}, value{value_} {} - std::counting_semaphore semaphore; + CountingSemaphore semaphore; std::atomic value; }; @@ -75,7 +77,7 @@ public: it = wait_list.erase(it); token_count -= waiter->need_count; waiter->was_signaled = true; - waiter->cv.notify_one(); + waiter->sem.release(); } return true; @@ -88,7 +90,7 @@ public: } for (auto* waiter : wait_list) { waiter->was_cancled = true; - waiter->cv.notify_one(); + waiter->sem.release(); } wait_list.clear(); token_count = set_count < 0 ? init_count : set_count; @@ -99,21 +101,21 @@ public: std::scoped_lock lk{mutex}; for (auto* waiter : wait_list) { waiter->was_deleted = true; - waiter->cv.notify_one(); + waiter->sem.release(); } wait_list.clear(); } public: struct WaitingThread { - std::condition_variable cv; + BinarySemaphore sem; u32 priority; s32 need_count; bool was_signaled{}; bool was_deleted{}; bool was_cancled{}; - explicit WaitingThread(s32 need_count, bool is_fifo) : need_count{need_count} { + explicit WaitingThread(s32 need_count, bool is_fifo) : sem{0}, need_count{need_count} { // Retrieve calling thread priority for sorting into waiting threads list. if (!is_fifo) { priority = g_curthread->attr.prio; @@ -134,24 +136,26 @@ public: } int Wait(std::unique_lock& lk, u32* timeout) { + lk.unlock(); if (!timeout) { // Wait indefinitely until we are woken up. - cv.wait(lk); + sem.acquire(); + lk.lock(); return GetResult(false); } // Wait until timeout runs out, recording how much remaining time there was. const auto start = std::chrono::high_resolution_clock::now(); - const auto signaled = cv.wait_for(lk, std::chrono::microseconds(*timeout), - [this] { return was_signaled; }); + sem.try_acquire_for(std::chrono::microseconds(*timeout)); const auto end = std::chrono::high_resolution_clock::now(); const auto time = std::chrono::duration_cast(end - start).count(); - if (signaled) { + lk.lock(); + if (was_signaled) { *timeout -= time; } else { *timeout = 0; } - return GetResult(!signaled); + return GetResult(!was_signaled); } }; diff --git a/src/core/libraries/kernel/time.cpp b/src/core/libraries/kernel/time.cpp index b586431ab..2565b8078 100644 --- a/src/core/libraries/kernel/time.cpp +++ b/src/core/libraries/kernel/time.cpp @@ -52,7 +52,22 @@ u64 PS4_SYSV_ABI sceKernelReadTsc() { int PS4_SYSV_ABI sceKernelUsleep(u32 microseconds) { #ifdef _WIN64 - std::this_thread::sleep_for(std::chrono::microseconds(microseconds)); + const auto start_time = std::chrono::high_resolution_clock::now(); + auto total_wait_time = std::chrono::microseconds(microseconds); + + while (total_wait_time.count() > 0) { + auto wait_time = std::chrono::ceil(total_wait_time).count(); + u64 res = SleepEx(static_cast(wait_time), true); + if (res == WAIT_IO_COMPLETION) { + auto elapsedTime = std::chrono::high_resolution_clock::now() - start_time; + auto elapsedMicroseconds = + std::chrono::duration_cast(elapsedTime).count(); + total_wait_time = std::chrono::microseconds(microseconds - elapsedMicroseconds); + } else { + break; + } + } + return 0; #else timespec start; From e1ecfb8dd1062d3081821aa25ba619f17c887497 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:46:44 -0800 Subject: [PATCH 8/8] semaphore: Add GCD semaphore implementation. (#1677) --- src/core/libraries/kernel/sync/semaphore.h | 36 +++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/sync/semaphore.h b/src/core/libraries/kernel/sync/semaphore.h index a103472c8..884b08968 100644 --- a/src/core/libraries/kernel/sync/semaphore.h +++ b/src/core/libraries/kernel/sync/semaphore.h @@ -11,6 +11,8 @@ #ifdef _WIN64 #include +#elif defined(__APPLE__) +#include #else #include #endif @@ -21,25 +23,32 @@ template class Semaphore { public: Semaphore(s32 initialCount) -#ifndef _WIN64 +#if !defined(_WIN64) && !defined(__APPLE__) : sem{initialCount} #endif { #ifdef _WIN64 sem = CreateSemaphore(nullptr, initialCount, max, nullptr); ASSERT(sem); +#elif defined(__APPLE__) + sem = dispatch_semaphore_create(initialCount); + ASSERT(sem); #endif } ~Semaphore() { #ifdef _WIN64 CloseHandle(sem); +#elif defined(__APPLE__) + dispatch_release(sem); #endif } void release() { #ifdef _WIN64 ReleaseSemaphore(sem, 1, nullptr); +#elif defined(__APPLE__) + dispatch_semaphore_signal(sem); #else sem.release(); #endif @@ -53,6 +62,13 @@ public: return; } } +#elif defined(__APPLE__) + for (;;) { + const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER); + if (res == 0) { + return; + } + } #else sem.acquire(); #endif @@ -61,6 +77,8 @@ public: bool try_acquire() { #ifdef _WIN64 return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0; +#elif defined(__APPLE__) + return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0; #else return sem.try_acquire(); #endif @@ -77,6 +95,10 @@ public: } return WaitForSingleObjectEx(sem, timeout_ms, true) == WAIT_OBJECT_0; +#elif defined(__APPLE__) + const auto rel_time_ns = std::chrono::ceil(rel_time).count(); + const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns); + return dispatch_semaphore_wait(sem, timeout) == 0; #else return sem.try_acquire_for(rel_time); #endif @@ -98,6 +120,16 @@ public: u64 res = WaitForSingleObjectEx(sem, static_cast(timeout_ms), true); return res == WAIT_OBJECT_0; +#elif defined(__APPLE__) + auto abs_s = std::chrono::time_point_cast(abs_time); + auto abs_ns = std::chrono::time_point_cast(abs_time) - + std::chrono::time_point_cast(abs_s); + const timespec abs_timespec = { + .tv_sec = abs_s.time_since_epoch().count(), + .tv_nsec = abs_ns.count(), + }; + const auto timeout = dispatch_walltime(&abs_timespec, 0); + return dispatch_semaphore_wait(sem, timeout) == 0; #else return sem.try_acquire_until(abs_time); #endif @@ -106,6 +138,8 @@ public: private: #ifdef _WIN64 HANDLE sem; +#elif defined(__APPLE__) + dispatch_semaphore_t sem; #else std::counting_semaphore sem; #endif