Merge branch 'main' into sse4a_again

This commit is contained in:
Paris Oplopoios 2025-06-08 21:56:56 +03:00 committed by GitHub
commit dbe799a4e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 114 additions and 30 deletions

View File

@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
}
} // namespace Libraries::Kernel

View File

@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
return ret;
}
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
int flags, const char* name) {
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
const char* name) {
LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
fmt::ptr(*addr_in_out), len, prot, flags, name);
if (len == 0 || !Common::Is16KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
return ORBIS_KERNEL_ERROR_EINVAL;
@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
SCOPE_EXIT {
LOG_INFO(Kernel_Vmm,
"in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
};
auto* memory = Core::Memory::Instance();
return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
Core::VMAType::Flexible, name);
const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
Core::VMAType::Flexible, name);
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
return ret;
}
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
int flags) {
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
}

View File

@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
size_t infoSize);
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot,
int flags, const char* name);
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
int flags);
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
const char* name);
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);

View File

@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
// Posix
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);

View File

@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
sr.type = sym_type;
const auto* record = m_hle_symbols.FindSymbol(sr);
if (!record) {
// Check if it an export function
const auto* p = FindExportedModule(*module, *library);
if (p && p->export_sym.GetSize() > 0) {
record = p->export_sym.FindSymbol(sr);
}
}
if (record) {
*return_info = *record;
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
return true;
}
// Check if it an export function
const auto* p = FindExportedModule(*module, *library);
if (p && p->export_sym.GetSize() > 0) {
record = p->export_sym.FindSymbol(sr);
if (record) {
*return_info = *record;
return true;
}
}
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
if (aeronid) {
return_info->name = aeronid->name;

View File

@ -182,7 +182,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
auto& area = CarveDmemArea(mapping_start, size)->second;
area.memory_type = memory_type;
area.is_free = false;
MergeAdjacent(dmem_map, dmem_area);
return mapping_start;
}

View File

@ -68,6 +68,22 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
});
}
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
}
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)};
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
});
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& texture = ctx.images[handle & 0xFFFF];
@ -175,6 +191,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
}
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value) {
return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
&Sirit::Module::OpAtomicCompareExchange);
}
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
}

View File

@ -96,6 +96,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id cmp_value);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);

View File

@ -634,7 +634,8 @@ void EmitContext::DefineOutputs() {
}
break;
}
case LogicalStage::Fragment:
case LogicalStage::Fragment: {
u32 num_render_targets = 0;
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
@ -643,11 +644,21 @@ void EmitContext::DefineOutputs() {
const u32 num_components = info.stores.NumComponents(mrt);
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
const Id type{GetAttributeType(*this, num_format)[num_components]};
const Id id{DefineOutput(type, i)};
Id id;
if (runtime_info.fs_info.dual_source_blending) {
id = DefineOutput(type, 0);
Decorate(id, spv::Decoration::Index, i);
} else {
id = DefineOutput(type, i);
}
Name(id, fmt::format("frag_color{}", i));
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
++num_render_targets;
}
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
"Dual source blending enabled, there must be exactly two MRT exports");
break;
}
case LogicalStage::Geometry: {
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);

View File

@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32
}
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
const u32 color_buffer_idx =
u32 color_buffer_idx =
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
color_buffer_idx = 0;
}
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
AmdGpu::NumberFormat num_format;
@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
}
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
const u32 color_buffer_idx =
u32 color_buffer_idx =
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
color_buffer_idx = 0;
}
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);

View File

@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
return BUFFER_ATOMIC(AtomicOp::Add, inst);
case Opcode::BUFFER_ATOMIC_SWAP:
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
case Opcode::BUFFER_ATOMIC_CMPSWAP:
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
case Opcode::BUFFER_ATOMIC_SMIN:
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
case Opcode::BUFFER_ATOMIC_UMIN:
@ -331,6 +333,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
switch (op) {
case AtomicOp::Swap:
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
case AtomicOp::CmpSwap: {
const IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
}
case AtomicOp::Add:
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
case AtomicOp::Smin:

View File

@ -513,6 +513,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
}
Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata,
const Value& cmp_value, BufferInstInfo info) {
return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value);
}
U32 IREmitter::DataAppend(const U32& counter) {
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
}

View File

@ -150,6 +150,9 @@ public:
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
const Value& value, BufferInstInfo info);
[[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address,
const Value& value, const Value& cmp_value,
BufferInstInfo info);
[[nodiscard]] U32 DataAppend(const U32& counter);
[[nodiscard]] U32 DataConsume(const U32& counter);

View File

@ -126,6 +126,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, )
// Vector utility
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )

View File

@ -196,11 +196,13 @@ struct FragmentRuntimeInfo {
u32 num_inputs;
std::array<PsInput, 32> inputs;
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
bool dual_source_blending;
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) &&
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
num_inputs == other.num_inputs &&
dual_source_blending == other.dual_source_blending &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
other.inputs.begin() + num_inputs);
}

View File

@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
}
}
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
using BlendFactor = Liverpool::BlendControl::BlendFactor;
switch (factor) {
case BlendFactor::Src1Color:
case BlendFactor::Src1Alpha:
case BlendFactor::InvSrc1Color:
case BlendFactor::InvSrc1Alpha:
return true;
default:
return false;
}
}
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
using BlendFunc = Liverpool::BlendControl::BlendFunc;
switch (func) {

View File

@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode);
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);

View File

@ -158,6 +158,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
info.fs_info.addr_flags = regs.ps_input_addr;
const auto& ps_inputs = regs.ps_inputs;
info.fs_info.num_inputs = regs.num_interp;
const auto& cb0_blend = regs.blend_control[0];
info.fs_info.dual_source_blending =
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) ||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor);
if (cb0_blend.separate_alpha_blend) {
info.fs_info.dual_source_blending |=
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) ||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor);
}
for (u32 i = 0; i < regs.num_interp; i++) {
info.fs_info.inputs[i] = {
.param_index = u8(ps_inputs[i].input_offset.Value()),

View File

@ -299,6 +299,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
auto& new_image = slot_images[new_image_id];
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
RefreshImage(new_image);
new_image.CopyImage(src_image);
if (src_image.binding.is_bound || src_image.binding.is_target) {