mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-25 11:34:55 +00:00
Merge branch 'main' into sse4a_again
This commit is contained in:
commit
dbe799a4e0
@ -273,6 +273,10 @@ void RegisterKernel(Core::Loader::SymbolsResolver* sym) {
|
||||
Libraries::Net::sceNetInetNtop); // TODO fix it to sys_ ...
|
||||
LIB_FUNCTION("4n51s0zEf0c", "libScePosix", 1, "libkernel", 1, 1,
|
||||
Libraries::Net::sceNetInetPton); // TODO fix it to sys_ ...
|
||||
LIB_FUNCTION("XVL8So3QJUk", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_connect);
|
||||
LIB_FUNCTION("3e+4Iv7IJ8U", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_accept);
|
||||
LIB_FUNCTION("aNeavPDNKzA", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_sendmsg);
|
||||
LIB_FUNCTION("pxnCmagrtao", "libScePosix", 1, "libkernel", 1, 1, Libraries::Net::sys_listen);
|
||||
}
|
||||
|
||||
} // namespace Libraries::Kernel
|
||||
|
@ -222,9 +222,10 @@ s32 PS4_SYSV_ABI sceKernelMapDirectMemory2(void** addr, u64 len, s32 type, s32 p
|
||||
return ret;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags, const char* name) {
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
|
||||
const char* name) {
|
||||
LOG_INFO(Kernel_Vmm, "in_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}, name = '{}'",
|
||||
fmt::ptr(*addr_in_out), len, prot, flags, name);
|
||||
if (len == 0 || !Common::Is16KBAligned(len)) {
|
||||
LOG_ERROR(Kernel_Vmm, "len is 0 or not 16kb multiple");
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
@ -243,18 +244,14 @@ s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, std::size_t
|
||||
const VAddr in_addr = reinterpret_cast<VAddr>(*addr_in_out);
|
||||
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
|
||||
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
|
||||
SCOPE_EXIT {
|
||||
LOG_INFO(Kernel_Vmm,
|
||||
"in_addr = {:#x}, out_addr = {}, len = {:#x}, prot = {:#x}, flags = {:#x}",
|
||||
in_addr, fmt::ptr(*addr_in_out), len, prot, flags);
|
||||
};
|
||||
auto* memory = Core::Memory::Instance();
|
||||
return memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
||||
Core::VMAType::Flexible, name);
|
||||
const auto ret = memory->MapMemory(addr_in_out, in_addr, len, mem_prot, map_flags,
|
||||
Core::VMAType::Flexible, name);
|
||||
LOG_INFO(Kernel_Vmm, "out_addr = {}", fmt::ptr(*addr_in_out));
|
||||
return ret;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags) {
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags) {
|
||||
return sceKernelMapNamedFlexibleMemory(addr_in_out, len, prot, flags, "anon");
|
||||
}
|
||||
|
||||
|
@ -141,10 +141,9 @@ s32 PS4_SYSV_ABI sceKernelAvailableDirectMemorySize(u64 searchStart, u64 searchE
|
||||
s32 PS4_SYSV_ABI sceKernelVirtualQuery(const void* addr, int flags, OrbisVirtualQueryInfo* info,
|
||||
size_t infoSize);
|
||||
s32 PS4_SYSV_ABI sceKernelReserveVirtualRange(void** addr, u64 len, int flags, u64 alignment);
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addrInOut, std::size_t len, int prot,
|
||||
int flags, const char* name);
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, std::size_t len, int prot,
|
||||
int flags);
|
||||
s32 PS4_SYSV_ABI sceKernelMapNamedFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags,
|
||||
const char* name);
|
||||
s32 PS4_SYSV_ABI sceKernelMapFlexibleMemory(void** addr_in_out, u64 len, s32 prot, s32 flags);
|
||||
int PS4_SYSV_ABI sceKernelQueryMemoryProtection(void* addr, void** start, void** end, u32* prot);
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelMprotect(const void* addr, u64 size, s32 prot);
|
||||
|
@ -426,6 +426,7 @@ void RegisterMutex(Core::Loader::SymbolsResolver* sym) {
|
||||
// Posix
|
||||
LIB_FUNCTION("ttHNfU+qDBU", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_init);
|
||||
LIB_FUNCTION("7H0iTOciTLo", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_lock);
|
||||
LIB_FUNCTION("Io9+nTKXZtA", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_timedlock);
|
||||
LIB_FUNCTION("2Z+PpY6CaJg", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_unlock);
|
||||
LIB_FUNCTION("ltCfaGr2JGE", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutex_destroy);
|
||||
LIB_FUNCTION("dQHWEsJtoE4", "libScePosix", 1, "libkernel", 1, 1, posix_pthread_mutexattr_init);
|
||||
|
@ -332,21 +332,22 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
|
||||
sr.type = sym_type;
|
||||
|
||||
const auto* record = m_hle_symbols.FindSymbol(sr);
|
||||
if (!record) {
|
||||
// Check if it an export function
|
||||
const auto* p = FindExportedModule(*module, *library);
|
||||
if (p && p->export_sym.GetSize() > 0) {
|
||||
record = p->export_sym.FindSymbol(sr);
|
||||
}
|
||||
}
|
||||
if (record) {
|
||||
*return_info = *record;
|
||||
|
||||
Core::Devtools::Widget::ModuleList::AddModule(sr.library);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if it an export function
|
||||
const auto* p = FindExportedModule(*module, *library);
|
||||
if (p && p->export_sym.GetSize() > 0) {
|
||||
record = p->export_sym.FindSymbol(sr);
|
||||
if (record) {
|
||||
*return_info = *record;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
const auto aeronid = AeroLib::FindByNid(sr.name.c_str());
|
||||
if (aeronid) {
|
||||
return_info->name = aeronid->name;
|
||||
|
@ -182,7 +182,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size,
|
||||
auto& area = CarveDmemArea(mapping_start, size)->second;
|
||||
area.memory_type = memory_type;
|
||||
area.is_free = false;
|
||||
MergeAdjacent(dmem_map, dmem_area);
|
||||
return mapping_start;
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,22 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
|
||||
});
|
||||
}
|
||||
|
||||
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id cmp_value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
|
||||
const auto& buffer = ctx.buffers[handle];
|
||||
if (Sirit::ValidId(buffer.offset)) {
|
||||
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
|
||||
}
|
||||
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
|
||||
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32];
|
||||
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
|
||||
const auto [scope, semantics]{AtomicArgs(ctx)};
|
||||
return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] {
|
||||
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
|
||||
});
|
||||
}
|
||||
|
||||
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,
|
||||
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
|
||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||
@ -175,6 +191,12 @@ Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
|
||||
return BufferAtomicU32(ctx, inst, handle, address, value, &Sirit::Module::OpAtomicExchange);
|
||||
}
|
||||
|
||||
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id cmp_value) {
|
||||
return BufferAtomicU32CmpSwap(ctx, inst, handle, address, value, cmp_value,
|
||||
&Sirit::Module::OpAtomicCompareExchange);
|
||||
}
|
||||
|
||||
Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value) {
|
||||
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicIAdd);
|
||||
}
|
||||
|
@ -96,6 +96,8 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
|
||||
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
|
||||
Id EmitBufferAtomicCmpSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
|
||||
Id cmp_value);
|
||||
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
|
||||
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
|
||||
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
|
||||
|
@ -634,7 +634,8 @@ void EmitContext::DefineOutputs() {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
case LogicalStage::Fragment: {
|
||||
u32 num_render_targets = 0;
|
||||
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
|
||||
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
|
||||
if (!info.stores.GetAny(mrt)) {
|
||||
@ -643,11 +644,21 @@ void EmitContext::DefineOutputs() {
|
||||
const u32 num_components = info.stores.NumComponents(mrt);
|
||||
const AmdGpu::NumberFormat num_format{runtime_info.fs_info.color_buffers[i].num_format};
|
||||
const Id type{GetAttributeType(*this, num_format)[num_components]};
|
||||
const Id id{DefineOutput(type, i)};
|
||||
Id id;
|
||||
if (runtime_info.fs_info.dual_source_blending) {
|
||||
id = DefineOutput(type, 0);
|
||||
Decorate(id, spv::Decoration::Index, i);
|
||||
} else {
|
||||
id = DefineOutput(type, i);
|
||||
}
|
||||
Name(id, fmt::format("frag_color{}", i));
|
||||
frag_outputs[i] = GetAttributeInfo(num_format, id, num_components, true);
|
||||
++num_render_targets;
|
||||
}
|
||||
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets == 2,
|
||||
"Dual source blending enabled, there must be exactly two MRT exports");
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Geometry: {
|
||||
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
|
||||
|
||||
|
@ -26,8 +26,11 @@ void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32
|
||||
}
|
||||
|
||||
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
|
||||
const u32 color_buffer_idx =
|
||||
u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
|
||||
AmdGpu::NumberFormat num_format;
|
||||
@ -68,8 +71,11 @@ void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR:
|
||||
}
|
||||
|
||||
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
|
||||
const u32 color_buffer_idx =
|
||||
u32 color_buffer_idx =
|
||||
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
|
||||
if (runtime_info.fs_info.dual_source_blending && attribute == IR::Attribute::RenderTarget1) {
|
||||
color_buffer_idx = 0;
|
||||
}
|
||||
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
|
||||
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
|
||||
|
||||
|
@ -70,6 +70,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||
return BUFFER_ATOMIC(AtomicOp::Add, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::Swap, inst);
|
||||
case Opcode::BUFFER_ATOMIC_CMPSWAP:
|
||||
return BUFFER_ATOMIC(AtomicOp::CmpSwap, inst);
|
||||
case Opcode::BUFFER_ATOMIC_SMIN:
|
||||
return BUFFER_ATOMIC(AtomicOp::Smin, inst);
|
||||
case Opcode::BUFFER_ATOMIC_UMIN:
|
||||
@ -331,6 +333,10 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||
switch (op) {
|
||||
case AtomicOp::Swap:
|
||||
return ir.BufferAtomicSwap(handle, address, vdata_val, buffer_info);
|
||||
case AtomicOp::CmpSwap: {
|
||||
const IR::Value cmp_val = ir.GetVectorReg(vdata + 1);
|
||||
return ir.BufferAtomicCmpSwap(handle, address, vdata_val, cmp_val, buffer_info);
|
||||
}
|
||||
case AtomicOp::Add:
|
||||
return ir.BufferAtomicIAdd(handle, address, vdata_val, buffer_info);
|
||||
case AtomicOp::Smin:
|
||||
|
@ -513,6 +513,11 @@ Value IREmitter::BufferAtomicSwap(const Value& handle, const Value& address, con
|
||||
return Inst(Opcode::BufferAtomicSwap32, Flags{info}, handle, address, value);
|
||||
}
|
||||
|
||||
Value IREmitter::BufferAtomicCmpSwap(const Value& handle, const Value& address, const Value& vdata,
|
||||
const Value& cmp_value, BufferInstInfo info) {
|
||||
return Inst(Opcode::BufferAtomicCmpSwap32, Flags{info}, handle, address, vdata, cmp_value);
|
||||
}
|
||||
|
||||
U32 IREmitter::DataAppend(const U32& counter) {
|
||||
return Inst<U32>(Opcode::DataAppend, counter, Imm32(0));
|
||||
}
|
||||
|
@ -150,6 +150,9 @@ public:
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicSwap(const Value& handle, const Value& address,
|
||||
const Value& value, BufferInstInfo info);
|
||||
[[nodiscard]] Value BufferAtomicCmpSwap(const Value& handle, const Value& address,
|
||||
const Value& value, const Value& cmp_value,
|
||||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 DataAppend(const U32& counter);
|
||||
[[nodiscard]] U32 DataConsume(const U32& counter);
|
||||
|
@ -126,6 +126,7 @@ OPCODE(BufferAtomicAnd32, U32, Opaq
|
||||
OPCODE(BufferAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicSwap32, U32, Opaque, Opaque, U32, )
|
||||
OPCODE(BufferAtomicCmpSwap32, U32, Opaque, Opaque, U32, U32, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
|
@ -196,11 +196,13 @@ struct FragmentRuntimeInfo {
|
||||
u32 num_inputs;
|
||||
std::array<PsInput, 32> inputs;
|
||||
std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
|
||||
bool dual_source_blending;
|
||||
|
||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||
return std::ranges::equal(color_buffers, other.color_buffers) &&
|
||||
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
|
||||
num_inputs == other.num_inputs &&
|
||||
dual_source_blending == other.dual_source_blending &&
|
||||
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
|
||||
other.inputs.begin() + num_inputs);
|
||||
}
|
||||
|
@ -214,6 +214,19 @@ vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
||||
}
|
||||
}
|
||||
|
||||
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor) {
|
||||
using BlendFactor = Liverpool::BlendControl::BlendFactor;
|
||||
switch (factor) {
|
||||
case BlendFactor::Src1Color:
|
||||
case BlendFactor::Src1Alpha:
|
||||
case BlendFactor::InvSrc1Color:
|
||||
case BlendFactor::InvSrc1Alpha:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func) {
|
||||
using BlendFunc = Liverpool::BlendControl::BlendFunc;
|
||||
switch (func) {
|
||||
|
@ -30,6 +30,8 @@ vk::FrontFace FrontFace(Liverpool::FrontFace mode);
|
||||
|
||||
vk::BlendFactor BlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||
|
||||
bool IsDualSourceBlendFactor(Liverpool::BlendControl::BlendFactor factor);
|
||||
|
||||
vk::BlendOp BlendOp(Liverpool::BlendControl::BlendFunc func);
|
||||
|
||||
vk::SamplerAddressMode ClampMode(AmdGpu::ClampMode mode);
|
||||
|
@ -158,6 +158,15 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
info.fs_info.addr_flags = regs.ps_input_addr;
|
||||
const auto& ps_inputs = regs.ps_inputs;
|
||||
info.fs_info.num_inputs = regs.num_interp;
|
||||
const auto& cb0_blend = regs.blend_control[0];
|
||||
info.fs_info.dual_source_blending =
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_dst_factor) ||
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.color_src_factor);
|
||||
if (cb0_blend.separate_alpha_blend) {
|
||||
info.fs_info.dual_source_blending |=
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_dst_factor) ||
|
||||
LiverpoolToVK::IsDualSourceBlendFactor(cb0_blend.alpha_src_factor);
|
||||
}
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.fs_info.inputs[i] = {
|
||||
.param_index = u8(ps_inputs[i].input_offset.Value()),
|
||||
|
@ -299,6 +299,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) {
|
||||
auto& new_image = slot_images[new_image_id];
|
||||
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
RefreshImage(new_image);
|
||||
new_image.CopyImage(src_image);
|
||||
|
||||
if (src_image.binding.is_bound || src_image.binding.is_target) {
|
||||
|
Loading…
Reference in New Issue
Block a user