diff --git a/src/core/libraries/kernel/kernel.cpp b/src/core/libraries/kernel/kernel.cpp index 180850217..930640d0e 100644 --- a/src/core/libraries/kernel/kernel.cpp +++ b/src/core/libraries/kernel/kernel.cpp @@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) { Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ... LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ... + LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect); + LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept); + LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg); + LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen); } } // namespace Libraries::Kernel diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp index 18676cbdf..5e94199e1 100644 --- a/src/core/libraries/kernel/memory.cpp +++ b/src/core/libraries/kernel/memory.cpp @@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p return ret; } -s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot, - int flags, const char* name) { - +s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags, + const char* name) { + LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'", + fmt::ptr(*addr_in_out), len, prot, flags, name); if (len == 0 || !Common::Is16KBAligned(len)) { LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple"); return ORBIS_KERNEL_ERROR_EINVAL; @@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t const VAddr in_addr = reinterpret_cast(*addr_in_out); const auto mem_prot = static_cast(prot); const auto map_flags = static_cast(flags); - SCOPE_EXIT { - LOG_INFO(Kernel_Vmm, - "in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}", - in_addr, fmt::ptr(*addr_in_out), len, prot, flags); - }; auto* memory = Core::Memory::Instance(); - return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags, - Core::VMAType::Flexible, name); + const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags, + Core::VMAType::Flexible, name); + LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out)); + return ret; } -s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, - int flags) { +s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) { return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon"); } diff --git a/src/core/libraries/kernel/memory.h b/src/core/libraries/kernel/memory.h index 6cefe0d07..ea42e7546 100644 --- a/src/core/libraries/kernel/memory.h +++ b/src/core/libraries/kernel/memory.h @@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info, size_t infoSize); s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment); -s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot, - int flags, const char* name); -s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot, - int flags); +s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags, + const char* name); +s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags); int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot); s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot); diff --git a/src/core/libraries/kernel/threads/mutex.cpp b/src/core/libraries/kernel/threads/mutex.cpp index 956e5ef65..3dbade96a 100644 --- a/src/core/libraries/kernel/threads/mutex.cpp +++ b/src/core/libraries/kernel/threads/mutex.cpp @@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) { // Posix LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init); LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock); + LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock); LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock); LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy); LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init); diff --git a/src/core/linker.cpp b/src/core/linker.cpp index c50b03a8f..1f45caf12 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul sr.type = sym_type; const auto* record = m_hle_symbols.FindSymbol(sr); - if (!record) { - // Check if it an export function - const auto* p = FindExportedModule(*module, *library); - if (p && p->export_sym.GetSize() > 0) { - record = p->export_sym.FindSymbol(sr); - } - } if (record) { *return_info = *record; - Core::Devtools::Widget::ModuleList::AddModule(sr.library); - return true; } + // Check if it an export function + const auto* p = FindExportedModule(*module, *library); + if (p && p->export_sym.GetSize() > 0) { + record = p->export_sym.FindSymbol(sr); + if (record) { + *return_info = *record; + return true; + } + } + const auto aeronid = AeroLib::FindByNid(sr.name.c_str()); if (aeronid) { return_info->name = aeronid->name; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ba3640877..54cae910b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -182,7 +182,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, auto& area = CarveDmemArea(mapping_start, size)->second; area.memory_type = memory_type; area.is_free = false; - MergeAdjacent(dmem_map, dmem_area); return mapping_start; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index d7c73ca8f..a342b47b6 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -68,6 +68,22 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id }); } +Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, + Id cmp_value, + Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) { + const auto& buffer = ctx.buffers[handle]; + if (Sirit::ValidId(buffer.offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + } + const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); + const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [scope, semantics]{AtomicArgs(ctx)}; + return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] { + return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value); + }); +} + Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& texture = ctx.images[handle & 0xFFFF]; @@ -175,6 +191,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange); } +Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, + Id cmp_value) { + return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value, + &Sirit::Module::OpAtomicCompareExchange); +} + Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) { return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 172358866..b9707224c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -96,6 +96,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value); +Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, + Id cmp_value); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index); Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 68bfcc0d0..bd10fd3df 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -634,7 +634,8 @@ void EmitContext::DefineOutputs() { } break; } - case LogicalStage::Fragment: + case LogicalStage::Fragment: { + u32 num_render_targets = 0; for (u32 i = 0; i < IR::NumRenderTargets; i++) { const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i}; if (!info.stores.GetAny(mrt)) { @@ -643,11 +644,21 @@ void EmitContext::DefineOutputs() { const u32 num_components = info.stores.NumComponents(mrt); const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format}; const Id type{GetAttributeType(*this, num_format)[num_components]}; - const Id id{DefineOutput(type, i)}; + Id id; + if (runtime_info.fs_info.dual_source_blending) { + id = DefineOutput(type, 0); + Decorate(id, spv::Decoration::Index, i); + } else { + id = DefineOutput(type, i); + } Name(id, fmt::format("frag_color{}", i)); frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true); + ++num_render_targets; } + ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2, + "Dual source blending enabled, there must be exactly two MRT exports"); break; + } case LogicalStage::Geometry: { output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output); diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index 0abef2e81..8a99f38a9 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32 } void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) { - const u32 color_buffer_idx = + u32 color_buffer_idx = static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0); + if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) { + color_buffer_idx = 0; + } const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; AmdGpu::NumberFormat num_format; @@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR: } void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) { - const u32 color_buffer_idx = + u32 color_buffer_idx = static_cast(attribute) - static_cast(IR::Attribute::RenderTarget0); + if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) { + color_buffer_idx = 0; + } const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx]; const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 5c972c607..5eb2079a4 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) { return BUFFER_ATOMIC(AtomicOp::Add, inst); case Opcode::BUFFER_ATOMIC_SWAP: return BUFFER_ATOMIC(AtomicOp::Swap, inst); + case Opcode::BUFFER_ATOMIC_CMPSWAP: + return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst); case Opcode::BUFFER_ATOMIC_SMIN: return BUFFER_ATOMIC(AtomicOp::Smin, inst); case Opcode::BUFFER_ATOMIC_UMIN: @@ -331,6 +333,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) { switch (op) { case AtomicOp::Swap: return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info); + case AtomicOp::CmpSwap: { + const IR::Value cmp_val = ir.GetVectorReg(vdata + 1); + return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info); + } case AtomicOp::Add: return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info); case AtomicOp::Smin: diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index dcb734d01..07249edfe 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -513,6 +513,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value); } +Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata, + const Value& cmp_value, BufferInstInfo info) { + return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value); +} + U32 IREmitter::DataAppend(const U32& counter) { return Inst(Opcode::DataAppend, counter, Imm32(0)); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index da7adf42b..7b9b81093 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -150,6 +150,9 @@ public: const Value& value, BufferInstInfo info); [[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address, const Value& value, BufferInstInfo info); + [[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address, + const Value& value, const Value& cmp_value, + BufferInstInfo info); [[nodiscard]] U32 DataAppend(const U32& counter); [[nodiscard]] U32 DataConsume(const U32& counter); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 647432bcf..5b3216be6 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -126,6 +126,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, ) OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, ) +OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, ) // Vector utility OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index b8ed42f5b..53d2d5303 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -196,11 +196,13 @@ struct FragmentRuntimeInfo { u32 num_inputs; std::array inputs; std::array color_buffers; + bool dual_source_blending; bool operator==(const FragmentRuntimeInfo& other) const noexcept { return std::ranges::equal(color_buffers, other.color_buffers) && en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw && num_inputs == other.num_inputs && + dual_source_blending == other.dual_source_blending && std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(), other.inputs.begin() + num_inputs); } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index a6ae0c304..5972296c0 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) { } } +bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) { + using BlendFactor = Liverpool::BlendControl::BlendFactor; + switch (factor) { + case BlendFactor::Src1Color: + case BlendFactor::Src1Alpha: + case BlendFactor::InvSrc1Color: + case BlendFactor::InvSrc1Alpha: + return true; + default: + return false; + } +} + vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) { using BlendFunc = Liverpool::BlendControl::BlendFunc; switch (func) { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index fca0a8378..61fd4a8c1 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode); vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor); +bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor); + vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func); vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d7ad47a3c..b72f77e55 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -158,6 +158,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS info.fs_info.addr_flags = regs.ps_input_addr; const auto& ps_inputs = regs.ps_inputs; info.fs_info.num_inputs = regs.num_interp; + const auto& cb0_blend = regs.blend_control[0]; + info.fs_info.dual_source_blending = + LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) || + LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor); + if (cb0_blend.separate_alpha_blend) { + info.fs_info.dual_source_blending |= + LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) || + LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor); + } for (u32 i = 0; i < regs.num_interp; i++) { info.fs_info.inputs[i] = { .param_index = u8(ps_inputs[i].input_offset.Value()), diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 63cfc4431..4b173c313 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -299,6 +299,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { auto& new_image = slot_images[new_image_id]; src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + RefreshImage(new_image); new_image.CopyImage(src_image); if (src_image.binding.is_bound || src_image.binding.is_target) {