From 1666b9d19947d357fd078fd3607e6edbcbfbf931 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 11 Jun 2024 17:31:01 +0300 Subject: [PATCH] memory: Fix tessellation buffer mapping --- src/core/file_sys/fs.cpp | 1 + src/core/libraries/disc_map/disc_map.cpp | 1 - .../libraries/kernel/thread_management.cpp | 3 +-- .../libraries/libc_internal/libc_internal.cpp | 22 ++++++++++++++++++- src/core/memory.cpp | 16 +++++++++----- src/core/memory.h | 4 ++++ .../frontend/translate/translate.cpp | 4 ++++ src/video_core/amdgpu/resource.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 7 ++++++ .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.h | 4 ++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 7 +++--- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- 13 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/core/file_sys/fs.cpp b/src/core/file_sys/fs.cpp index 6a99a8088..c42a0d3e2 100644 --- a/src/core/file_sys/fs.cpp +++ b/src/core/file_sys/fs.cpp @@ -97,4 +97,5 @@ File* HandleTable::getFile(const std::string& host_name) { } return nullptr; } + } // namespace Core::FileSys diff --git a/src/core/libraries/disc_map/disc_map.cpp b/src/core/libraries/disc_map/disc_map.cpp index 638adaf29..79f4acb34 100644 --- a/src/core/libraries/disc_map/disc_map.cpp +++ b/src/core/libraries/disc_map/disc_map.cpp @@ -36,7 +36,6 @@ int PS4_SYSV_ABI Func_E7EBCE96E92F91F8() { } void RegisterlibSceDiscMap(Core::Loader::SymbolsResolver* sym) { - return; LIB_FUNCTION("fl1eoDnwQ4s", "libSceDiscMap", 1, "libSceDiscMap", 1, 1, sceDiscMapGetPackageSize); LIB_FUNCTION("lbQKqsERhtE", "libSceDiscMap", 1, "libSceDiscMap", 1, 1, diff --git a/src/core/libraries/kernel/thread_management.cpp b/src/core/libraries/kernel/thread_management.cpp index 931ce75db..7623aea04 100644 --- a/src/core/libraries/kernel/thread_management.cpp +++ b/src/core/libraries/kernel/thread_management.cpp @@ -650,7 +650,6 @@ int PS4_SYSV_ABI scePthreadCondattrInit(ScePthreadCondattr* attr) { } int PS4_SYSV_ABI scePthreadCondBroadcast(ScePthreadCond* cond) { - LOG_INFO(Kernel_Pthread, "called"); cond = static_cast(createCond(cond)); if (cond == nullptr) { @@ -659,7 +658,7 @@ int PS4_SYSV_ABI scePthreadCondBroadcast(ScePthreadCond* cond) { int result = pthread_cond_broadcast(&(*cond)->cond); - LOG_INFO(Kernel_Pthread, "name={}, result={}", (*cond)->name, result); + LOG_TRACE(Kernel_Pthread, "called name={}, result={}", (*cond)->name, result); return (result == 0 ? SCE_OK : SCE_KERNEL_ERROR_EINVAL); } diff --git a/src/core/libraries/libc_internal/libc_internal.cpp b/src/core/libraries/libc_internal/libc_internal.cpp index 93c980759..b4bfb705f 100644 --- a/src/core/libraries/libc_internal/libc_internal.cpp +++ b/src/core/libraries/libc_internal/libc_internal.cpp @@ -39,10 +39,26 @@ int PS4_SYSV_ABI internal_memcmp(const void* s1, const void* s2, size_t n) { return std::memcmp(s1, s2, n); } +int PS4_SYSV_ABI internal_strncmp(const char* str1, const char* str2, size_t num) { + return std::strncmp(str1, str2, num); +} + +int PS4_SYSV_ABI internal_strlen(const char* str) { + return std::strlen(str); +} + float PS4_SYSV_ABI internal_expf(float x) { return expf(x); } +void* PS4_SYSV_ABI internal_malloc(size_t size) { + return std::malloc(size); +} + +char* PS4_SYSV_ABI internal_strncpy(char* dest, const char* src, std::size_t count) { + return std::strncpy(dest, src, count); +} + void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("NFLs+dRJGNg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_memcpy_s); @@ -55,6 +71,10 @@ void RegisterlibSceLibcInternal(Core::Loader::SymbolsResolver* sym) { LIB_FUNCTION("DfivPArhucg", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_memcmp); LIB_FUNCTION("8zsu04XNsZ4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_expf); + LIB_FUNCTION("aesyjrHVWy4", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncmp); + LIB_FUNCTION("j4ViWNHEgww", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strlen); + LIB_FUNCTION("6sJWiWSRuqk", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_strncpy); + LIB_FUNCTION("gQX+4GDQjpM", "libSceLibcInternal", 1, "libSceLibcInternal", 1, 1, internal_malloc); }; -} // namespace Libraries::LibcInternal \ No newline at end of file +} // namespace Libraries::LibcInternal diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 1b82f654c..c19171127 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -119,12 +119,18 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M // Find the first free area starting with provided virtual address. if (False(flags & MemoryMapFlags::Fixed)) { auto it = FindVMA(mapped_addr); - while (it->second.type != VMAType::Free || it->second.size < size) { - it++; + // If the VMA is free and contains the requested mapping we are done. + if (it->second.type == VMAType::Free && it->second.Contains(virtual_addr, size)) { + mapped_addr = alignment > 0 ? Common::AlignUp(base, alignment) : base; + } else { + // Search for the first free VMA that fits our mapping. + while (it->second.type != VMAType::Free || it->second.size < size) { + it++; + } + ASSERT(it != vma_map.end()); + const auto& vma = it->second; + mapped_addr = alignment > 0 ? Common::AlignUp(vma.base, alignment) : vma.base; } - ASSERT(it != vma_map.end()); - const VAddr base = it->second.base; - mapped_addr = alignment > 0 ? Common::AlignUp(base, alignment) : base; } // Perform the mapping. diff --git a/src/core/memory.h b/src/core/memory.h index a64d511e6..711fdc609 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -88,6 +88,10 @@ struct VirtualMemoryArea { std::string name = ""; void* fd = nullptr; + bool Contains(VAddr addr, size_t size) const { + return addr >= base && (addr + size) < (base + this->size); + } + bool CanMergeWith(const VirtualMemoryArea& next) const { if (disallow_merge || next.disallow_merge) { return false; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 2ad25f3a3..608bc9ae1 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -121,6 +121,9 @@ IR::U32F32 Translator::GetSrc(const InstOperand& operand, bool force_flt) { case OperandField::ConstFloatNeg_2_0: value = ir.Imm32(-2.0f); break; + case OperandField::ConstFloatNeg_4_0: + value = ir.Imm32(-4.0f); + break; case OperandField::VccLo: if (force_flt) { value = ir.BitCast(ir.GetVccLo()); @@ -301,6 +304,7 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_MADAK_F32: // Yes these can share the opcode translator.V_FMA_F32(inst); break; + case Opcode::IMAGE_SAMPLE_LZ_O: case Opcode::IMAGE_SAMPLE_C_LZ: case Opcode::IMAGE_SAMPLE_LZ: case Opcode::IMAGE_SAMPLE: diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ba2231b0c..1358c8adc 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -281,7 +281,7 @@ struct Sampler { }; float LodBias() const noexcept { - return static_cast(lod_bias); + return static_cast(static_cast((lod_bias.Value() ^ 0x2000u) - 0x2000u)) / 256.0f; } float MinLod() const noexcept { diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8ca82f821..0096d34b2 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -347,6 +347,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eR8G8Unorm; } + if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc2UnormBlock; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } @@ -367,6 +370,10 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format == DepthBuffer::StencilFormat::Stencil8) { return vk::Format::eD16UnormS8Uint; } + if (z_format == DepthBuffer::ZFormat::Invald && + stencil_format == DepthBuffer::StencilFormat::Invalid) { + return vk::Format::eUndefined; + } UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d3f7c647a..d02e1d619 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -78,7 +78,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul .depthClampEnable = false, .rasterizerDiscardEnable = false, .polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode), - .cullMode = LiverpoolToVK::CullMode(key.cull_mode), + .cullMode = vk::CullModeFlagBits::eNone/*LiverpoolToVK::CullMode(key.cull_mode)*/, .frontFace = key.front_face == Liverpool::FrontFace::Clockwise ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 5db405241..4b38aa3d6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -79,6 +79,10 @@ public: return key.write_masks; } + [[nodiscard]] bool IsDepthEnabled() const { + return key.depth.depth_enable.Value(); + } + private: void BuildDescSetLayout(); void BindVertexBuffers(StreamBuffer& staging) const; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 00e992bb7..bf17cb50f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -114,9 +114,10 @@ void PipelineCache::RefreshGraphicsKey() { key.front_face = regs.polygon_control.front_face; const auto& db = regs.depth_buffer; - key.depth_format = key.depth.depth_enable - ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) - : vk::Format::eUndefined; + if (key.depth.depth_enable) { + key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format); + key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined); + } // `RenderingInfo` is assumed to be initialized with a contiguous array of valid color // attachments. This might be not a case as HW color buffers can be bound in an arbitrary order. // We need to do some arrays compaction at this stage diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 291d38fd2..a1aac1b97 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -59,7 +59,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { .storeOp = vk::AttachmentStoreOp::eStore, }); } - if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) { + if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) { const bool is_clear = regs.depth_render_control.depth_clear_enable; const auto& image_view = texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);