From c86a00638f4532461047ce9055ab3d9ccd7f29b6 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Tue, 11 Jun 2024 00:44:56 +0300 Subject: [PATCH] video_core: Add a few missed things --- src/core/libraries/system/userservice.cpp | 2 +- src/core/memory.cpp | 9 +++++++++ src/core/memory.h | 1 + .../frontend/translate/scalar_alu.cpp | 7 +++++-- .../frontend/translate/translate.cpp | 8 +++++++- .../frontend/translate/translate.h | 2 +- .../frontend/translate/vector_alu.cpp | 8 ++++---- src/video_core/amdgpu/liverpool.h | 6 +++--- .../renderer_vulkan/vk_compute_pipeline.cpp | 3 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 7 ++++--- .../renderer_vulkan/vk_pipeline_cache.cpp | 4 ++++ src/video_core/texture_cache/image_view.cpp | 14 ++++++++------ src/video_core/texture_cache/image_view.h | 3 ++- src/video_core/texture_cache/texture_cache.cpp | 4 ++-- 14 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/core/libraries/system/userservice.cpp b/src/core/libraries/system/userservice.cpp index 16e5295c7..8c48b3111 100644 --- a/src/core/libraries/system/userservice.cpp +++ b/src/core/libraries/system/userservice.cpp @@ -104,7 +104,7 @@ int PS4_SYSV_ABI sceUserServiceGetDiscPlayerFlag() { } s32 PS4_SYSV_ABI sceUserServiceGetEvent(OrbisUserServiceEvent* event) { - LOG_INFO(Lib_UserService, "(DUMMY) called"); + LOG_TRACE(Lib_UserService, "(DUMMY) called"); // fake a loggin event static bool logged_in = false; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a60680539..946eb8637 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -83,6 +83,9 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M MemoryMapFlags flags, VMAType type, std::string_view name, bool is_exec, PAddr phys_addr, u64 alignment) { std::scoped_lock lk{mutex}; + if (total_flexible_usage + size > 448_MB) { + return SCE_KERNEL_ERROR_ENOMEM; + } // When virtual addr is zero, force it to virtual_base. The guest cannot pass Fixed // flag so we will take the branch that searches for free (or reserved) mappings. @@ -100,6 +103,9 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.phys_base = phys_addr; MapVulkanMemory(mapped_addr, size); } + if (type == VMAType::Flexible) { + total_flexible_usage += size; + } }; // Fixed mapping means the virtual address must exactly match the provided one. @@ -139,6 +145,9 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { if (type == VMAType::Direct) { UnmapVulkanMemory(virtual_addr, size); } + if (type == VMAType::Flexible) { + total_flexible_usage -= size; + } // Mark region as free and attempt to coalesce it with neighbours. auto& vma = it->second; diff --git a/src/core/memory.h b/src/core/memory.h index c5d130c0f..27ee5194f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -182,6 +182,7 @@ private: DMemMap dmem_map; VMAMap vma_map; std::recursive_mutex mutex; + size_t total_flexible_usage{}; struct MappedMemory { vk::UniqueBuffer buffer; diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index d3db37662..69005f1a0 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -160,7 +160,7 @@ void Translator::S_OR_B64(bool negate, const GcnInst& inst) { } } -void Translator::S_AND_B64(const GcnInst& inst) { +void Translator::S_AND_B64(bool negate, const GcnInst& inst) { const auto get_src = [&](const InstOperand& operand) { switch (operand.field) { case OperandField::VccLo: @@ -175,7 +175,10 @@ void Translator::S_AND_B64(const GcnInst& inst) { }; const IR::U1 src0{get_src(inst.src[0])}; const IR::U1 src1{get_src(inst.src[1])}; - const IR::U1 result = ir.LogicalAnd(src0, src1); + IR::U1 result = ir.LogicalAnd(src0, src1); + if (negate) { + result = ir.LogicalNot(result); + } ir.SetScc(result); switch (inst.dst[0].field) { case OperandField::VccLo: diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index c0ddf4ae9..2ad25f3a3 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -366,6 +366,9 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) case Opcode::V_CMP_NLE_F32: translator.V_CMP_F32(ConditionOp::GT, false, inst); break; + case Opcode::V_CMP_NLT_F32: + translator.V_CMP_F32(ConditionOp::GE, false, inst); + break; case Opcode::S_CMP_LG_U32: translator.S_CMP(ConditionOp::LG, false, inst); break; @@ -554,7 +557,10 @@ void Translate(IR::Block* block, std::span inst_list, Info& info) translator.S_OR_B64(true, inst); break; case Opcode::S_AND_B64: - translator.S_AND_B64(inst); + translator.S_AND_B64(false, inst); + break; + case Opcode::S_NAND_B64: + translator.S_AND_B64(true, inst); break; case Opcode::V_LSHRREV_B32: translator.V_LSHRREV_B32(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index a8964fc9e..5f273bc85 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -41,7 +41,7 @@ public: void S_AND_SAVEEXEC_B64(const GcnInst& inst); void S_MOV_B64(const GcnInst& inst); void S_OR_B64(bool negate, const GcnInst& inst); - void S_AND_B64(const GcnInst& inst); + void S_AND_B64(bool negate, const GcnInst& inst); void S_ADD_I32(const GcnInst& inst); void S_AND_B32(const GcnInst& inst); void S_LSHR_B32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index dbd9471f1..6e66f564b 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -15,7 +15,7 @@ void Translator::V_SAD(const GcnInst& inst) { } void Translator::V_MAC_F32(const GcnInst& inst) { - SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0]), GetSrc(inst.src[1]), GetSrc(inst.dst[0]))); + SetDst(inst.dst[0], ir.FPFma(GetSrc(inst.src[0], true), GetSrc(inst.src[1], true), GetSrc(inst.dst[0], true))); } void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) { @@ -127,13 +127,13 @@ void Translator::V_FLOOR_F32(const GcnInst& inst) { } void Translator::V_SUB_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; - const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 src0{GetSrc(inst.src[0], true)}; + const IR::F32 src1{GetSrc(inst.src[1], true)}; SetDst(inst.dst[0], ir.FPSub(src0, src1)); } void Translator::V_RCP_F32(const GcnInst& inst) { - const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src0{GetSrc(inst.src[0], true)}; SetDst(inst.dst[0], ir.FPRecip(src0)); } diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 22b59f740..4a8b9885c 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -479,9 +479,9 @@ struct Liverpool { }; struct Scissor { - union { - BitField<0, 16, s32> top_left_x; - BitField<16, 16, s32> top_left_y; + struct { + s16 top_left_x; + s16 top_left_y; }; union { BitField<0, 15, u32> bottom_right_x; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index c851aa9d5..7d1a980cf 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -36,7 +36,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler for (const auto& image : info.images) { bindings.push_back({ .binding = binding++, - .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 3f2195d7e..d3f7c647a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -289,7 +289,8 @@ void GraphicsPipeline::BuildDescSetLayout() { for (const auto& image : stage.images) { bindings.push_back({ .binding = binding++, - .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage + : vk::DescriptorType::eSampledImage, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment, }); @@ -316,8 +317,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer& BindVertexBuffers(staging); // Bind resource buffers and textures. - boost::container::static_vector buffer_infos; - boost::container::static_vector image_infos; + boost::container::static_vector buffer_infos; + boost::container::static_vector image_infos; boost::container::small_vector set_writes; u32 binding{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 141ac6350..00e992bb7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -8,6 +8,7 @@ #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "shader_recompiler/exception.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -88,6 +89,8 @@ void PipelineCache::RefreshGraphicsKey() { auto& key = graphics_key; key.depth = regs.depth_control; + key.depth.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() && + !regs.depth_render_control.depth_clear_enable); key.depth_bounds_min = regs.depth_bounds_min; key.depth_bounds_max = regs.depth_bounds_max; key.depth_bias_enable = regs.polygon_control.enable_polygon_offset_back || @@ -180,6 +183,7 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline() { inst_pool.ReleaseContents(); // Recompile shader to IR. + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#X}", stage, hash); const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info)); diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 1d3e5e218..7ffa23d15 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -46,17 +46,19 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) { } } -ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept { +ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept : is_storage{is_storage} { type = ConvertImageViewType(image.type); format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt()); range.base.level = 0; range.base.layer = 0; range.extent.levels = image.NumLevels(); range.extent.layers = image.NumLayers(); - mapping.r = ConvertComponentSwizzle(image.dst_sel_x); - mapping.g = ConvertComponentSwizzle(image.dst_sel_y); - mapping.b = ConvertComponentSwizzle(image.dst_sel_z); - mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + if (!is_storage) { + mapping.r = ConvertComponentSwizzle(image.dst_sel_x); + mapping.g = ConvertComponentSwizzle(image.dst_sel_y); + mapping.b = ConvertComponentSwizzle(image.dst_sel_z); + mapping.a = ConvertComponentSwizzle(image.dst_sel_w); + } } ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image, @@ -74,7 +76,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info } const vk::ImageViewCreateInfo image_view_ci = { - .pNext = usage_override.has_value() ? &usage_ci : nullptr, + .pNext = nullptr, .image = image.image, .viewType = info.type, .format = format, diff --git a/src/video_core/texture_cache/image_view.h b/src/video_core/texture_cache/image_view.h index 2e15e1a1a..83936acc0 100644 --- a/src/video_core/texture_cache/image_view.h +++ b/src/video_core/texture_cache/image_view.h @@ -18,12 +18,13 @@ namespace VideoCore { struct ImageViewInfo { explicit ImageViewInfo() = default; - explicit ImageViewInfo(const AmdGpu::Image& image) noexcept; + explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept; vk::ImageViewType type = vk::ImageViewType::e2D; vk::Format format = vk::Format::eR8G8B8A8Unorm; SubresourceRange range; vk::ComponentMapping mapping{}; + bool is_storage; auto operator<=>(const ImageViewInfo&) const = default; }; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index fca79f496..8c910c03e 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -169,14 +169,14 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead); } - const ImageViewInfo view_info{desc}; + const ImageViewInfo view_info{desc, is_storage}; return RegisterImageView(image, view_info); } ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, const AmdGpu::Liverpool::CbDbExtent& hint) { const ImageInfo info{buffer, hint}; - auto& image = FindImage(info, buffer.Address()); + auto& image = FindImage(info, buffer.Address(), false); image.flags &= ~ImageFlagBits::CpuModified; image.Transit(vk::ImageLayout::eColorAttachmentOptimal,