diff --git a/CMakeLists.txt b/CMakeLists.txt index 6dfe9348a..7c2739d22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1119,6 +1119,10 @@ if (APPLE) set(MVK_BUNDLE_PATH "Resources/vulkan/icd.d") set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path/../${MVK_BUNDLE_PATH}") set(MVK_DST ${CMAKE_CURRENT_BINARY_DIR}/shadps4.app/Contents/${MVK_BUNDLE_PATH}) + + add_custom_command( + OUTPUT ${MVK_DST} + COMMAND ${CMAKE_COMMAND} -E make_directory ${MVK_DST}) else() set_property(TARGET shadps4 APPEND PROPERTY BUILD_RPATH "@executable_path") set(MVK_DST ${CMAKE_CURRENT_BINARY_DIR}) @@ -1129,9 +1133,6 @@ if (APPLE) set(MVK_ICD_SRC ${CMAKE_CURRENT_SOURCE_DIR}/externals/MoltenVK/MoltenVK/MoltenVK/icd/MoltenVK_icd.json) set(MVK_ICD_DST ${MVK_DST}/MoltenVK_icd.json) - add_custom_command( - OUTPUT ${MVK_DST} - COMMAND ${CMAKE_COMMAND} -E make_directory ${MVK_DST}) add_custom_command( OUTPUT ${MVK_ICD_DST} DEPENDS ${MVK_ICD_SRC} ${MVK_DST} @@ -1146,17 +1147,13 @@ if (APPLE) if (ARCHITECTURE STREQUAL "x86_64") # Reserve system-managed memory space. - target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,SYSTEM_MANAGED,0x400000,-segaddr,SYSTEM_RESERVED,0x7FFFFC000,-image_base,0x20000000000) + target_link_options(shadps4 PRIVATE -Wl,-ld_classic,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,SYSTEM_MANAGED,0x400000,-segaddr,SYSTEM_RESERVED,0x7FFFFC000,-image_base,0x20000000000) endif() # Replacement for std::chrono::time_zone target_link_libraries(shadps4 PRIVATE date::date-tz) endif() -if (NOT ENABLE_QT_GUI) - target_link_libraries(shadps4 PRIVATE SDL3::SDL3) -endif() - if (ENABLE_QT_GUI) target_link_libraries(shadps4 PRIVATE Qt6::Widgets Qt6::Concurrent Qt6::Network Qt6::Multimedia) add_definitions(-DENABLE_QT_GUI) diff --git a/externals/MoltenVK/MoltenVK b/externals/MoltenVK/MoltenVK index 3a0b07a24..00abd384c 160000 --- a/externals/MoltenVK/MoltenVK +++ b/externals/MoltenVK/MoltenVK @@ -1 +1 @@ -Subproject commit 3a0b07a24a4a681ffe70b461b1f4333b2729e2ef +Subproject commit 00abd384ce01cbd439045905d2fa6cf799dfa2f6 diff --git a/externals/MoltenVK/SPIRV-Cross b/externals/MoltenVK/SPIRV-Cross index 969e75f7c..1a69a919f 160000 --- a/externals/MoltenVK/SPIRV-Cross +++ b/externals/MoltenVK/SPIRV-Cross @@ -1 +1 @@ -Subproject commit 969e75f7cc0718774231d029f9d52fa87d4ae1b2 +Subproject commit 1a69a919fa302e92b337594bd0a8aaea61037d91 diff --git a/src/core/signals.cpp b/src/core/signals.cpp index e47a78cd2..4099ac237 100644 --- a/src/core/signals.cpp +++ b/src/core/signals.cpp @@ -11,6 +11,7 @@ #include #else #include +#include #ifdef ARCH_X86_64 #include #endif diff --git a/src/core/tls.cpp b/src/core/tls.cpp index e13c683e1..0d1d514cf 100644 --- a/src/core/tls.cpp +++ b/src/core/tls.cpp @@ -51,7 +51,7 @@ Tcb* GetTcbBase() { // Apple x86_64 // Reserve space in the 32-bit address range for allocating TCB pages. -asm(".zerofill TCB_SPACE,TCB_SPACE,__guest_system,0x3FC000"); +asm(".zerofill TCB_SPACE,TCB_SPACE,__tcb_space,0x3FC000"); struct LdtPage { void* tcb; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 672856397..c47a75739 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -299,8 +299,7 @@ void EmitContext::DefineInterpolatedAttribs() { // Iterate all input attributes, load them and manually interpolate. for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; - const u32 semantic = input.param_index; - auto& params = input_params[semantic]; + auto& params = input_params[i]; if (input.is_flat || params.is_loaded) { continue; } @@ -318,7 +317,7 @@ void EmitContext::DefineInterpolatedAttribs() { const Id p10_y{OpVectorTimesScalar(F32[4], p10, bary_coord_y)}; const Id p20_z{OpVectorTimesScalar(F32[4], p20, bary_coord_z)}; params.id = OpFAdd(F32[4], p0, OpFAdd(F32[4], p10_y, p20_z)); - Name(params.id, fmt::format("fs_in_attr{}", semantic)); + Name(params.id, fmt::format("fs_in_attr{}", i)); params.is_loaded = true; } } @@ -427,25 +426,28 @@ void EmitContext::DefineInputs() { } for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) { const auto& input = runtime_info.fs_info.inputs[i]; - const u32 semantic = input.param_index; - ASSERT(semantic < IR::NumParams); if (input.IsDefault()) { - input_params[semantic] = { - MakeDefaultValue(*this, input.default_value), input_f32, F32[1], 4, false, true, + input_params[i] = { + .id = MakeDefaultValue(*this, input.default_value), + .pointer_type = input_f32, + .component_type = F32[1], + .num_components = 4, + .is_integer = false, + .is_loaded = true, }; continue; } - const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; + const IR::Attribute param{IR::Attribute::Param0 + i}; const u32 num_components = info.loads.NumComponents(param); const Id type{F32[num_components]}; Id attr_id{}; if (profile.needs_manual_interpolation && !input.is_flat) { - attr_id = DefineInput(TypeArray(type, ConstU32(3U)), semantic); + attr_id = DefineInput(TypeArray(type, ConstU32(3U)), input.param_index); Decorate(attr_id, spv::Decoration::PerVertexKHR); - Name(attr_id, fmt::format("fs_in_attr{}_p", semantic)); + Name(attr_id, fmt::format("fs_in_attr{}_p", i)); } else { - attr_id = DefineInput(type, semantic); - Name(attr_id, fmt::format("fs_in_attr{}", semantic)); + attr_id = DefineInput(type, input.param_index); + Name(attr_id, fmt::format("fs_in_attr{}", i)); if (input.is_flat) { Decorate(attr_id, spv::Decoration::Flat); @@ -453,7 +455,7 @@ void EmitContext::DefineInputs() { Decorate(attr_id, spv::Decoration::NoPerspective); } } - input_params[semantic] = + input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components, false); } break; diff --git a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp index 2d7297c12..5a287dbe2 100644 --- a/src/shader_recompiler/frontend/translate/vector_interpolation.cpp +++ b/src/shader_recompiler/frontend/translate/vector_interpolation.cpp @@ -22,15 +22,17 @@ void Translator::EmitVectorInterpolation(const GcnInst& inst) { // VINTRP void Translator::V_INTERP_P2_F32(const GcnInst& inst) { - const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); - info.interp_qualifiers[attr.param_index] = vgpr_to_interp[inst.src[0].code]; - const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; + const u32 attr_index = inst.control.vintrp.attr; + const auto& attr = runtime_info.fs_info.inputs.at(attr_index); + info.interp_qualifiers[attr_index] = vgpr_to_interp[inst.src[0].code]; + const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); } void Translator::V_INTERP_MOV_F32(const GcnInst& inst) { - const auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr); - const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index}; + const u32 attr_index = inst.control.vintrp.attr; + const auto& attr = runtime_info.fs_info.inputs.at(attr_index); + const IR::Attribute attrib{IR::Attribute::Param0 + attr_index}; SetDst(inst.dst[0], ir.GetAttribute(attrib, inst.control.vintrp.chan)); } diff --git a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp index 409c05940..12d4d0659 100644 --- a/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp +++ b/src/shader_recompiler/ir/passes/shared_memory_to_storage_pass.cpp @@ -34,11 +34,11 @@ void SharedMemoryToStoragePass(IR::Program& program, const RuntimeInfo& runtime_ if (program.info.stage != Stage::Compute) { return; } - // Only perform the transform if the host shared memory is insufficient - // or the device does not support VK_KHR_workgroup_memory_explicit_layout + // Only perform the transform if there is shared memory and either host shared memory is + // insufficient or the device does not support VK_KHR_workgroup_memory_explicit_layout const u32 shared_memory_size = runtime_info.cs_info.shared_memory_size; - if (shared_memory_size <= profile.max_shared_memory_size && - profile.supports_workgroup_explicit_memory_layout) { + if (shared_memory_size == 0 || (shared_memory_size <= profile.max_shared_memory_size && + profile.supports_workgroup_explicit_memory_layout)) { return; } // Add buffer binding for shared memory storage buffer. diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0591e06ce..63c0a38d6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -445,7 +445,25 @@ bool Instance::CreateDevice() { workgroup_memory_explicit_layout_features.workgroupMemoryExplicitLayout16BitAccess, }, #ifdef __APPLE__ - portability_features, + vk::PhysicalDevicePortabilitySubsetFeaturesKHR{ + .constantAlphaColorBlendFactors = portability_features.constantAlphaColorBlendFactors, + .events = portability_features.events, + .imageViewFormatReinterpretation = portability_features.imageViewFormatReinterpretation, + .imageViewFormatSwizzle = portability_features.imageViewFormatSwizzle, + .imageView2DOn3DImage = portability_features.imageView2DOn3DImage, + .multisampleArrayImage = portability_features.multisampleArrayImage, + .mutableComparisonSamplers = portability_features.mutableComparisonSamplers, + .pointPolygons = portability_features.pointPolygons, + .samplerMipLodBias = portability_features.samplerMipLodBias, + .separateStencilMaskRef = portability_features.separateStencilMaskRef, + .shaderSampleRateInterpolationFunctions = + portability_features.shaderSampleRateInterpolationFunctions, + .tessellationIsolines = portability_features.tessellationIsolines, + .tessellationPointMode = portability_features.tessellationPointMode, + .triangleFans = portability_features.triangleFans, + .vertexAttributeAccessBeyondStride = + portability_features.vertexAttributeAccessBeyondStride, + }, #endif }; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index d8070da61..6241100a0 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -319,7 +319,8 @@ void Image::CopyImage(const Image& image) { auto cmdbuf = scheduler->CommandBuffer(); boost::container::small_vector image_copy{}; - for (u32 m = 0; m < image.info.resources.levels; ++m) { + const u32 num_mips = std::min(image.info.resources.levels, info.resources.levels); + for (u32 m = 0; m < num_mips; ++m) { const auto mip_w = std::max(image.info.size.width >> m, 1u); const auto mip_h = std::max(image.info.size.height >> m, 1u); const auto mip_d = std::max(image.info.size.depth >> m, 1u); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index f070b9132..cc244eb6b 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -461,9 +461,9 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) { const ImageId image_id = FindImage(desc); Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; - image.flags &= ~ImageFlagBits::Dirty; image.usage.depth_target = 1u; image.usage.stencil = image.info.HasStencil(); + UpdateImage(image_id); // Register meta data for this depth buffer if (!(image.flags & ImageFlagBits::MetaRegistered)) {