From f480d091ce7b3569e1b63e36de947a792f760603 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Fri, 24 May 2024 23:50:56 +0300 Subject: [PATCH] video_core: Add basic vertex, index buffer handling and pipeline caching --- .gitmodules | 2 +- externals/CMakeLists.txt | 6 +- src/core/memory.cpp | 93 ++++++++++++++ src/core/memory.h | 28 +++- .../backend/spirv/spirv_emit_context.cpp | 6 +- .../frontend/fetch_shader.cpp | 12 +- src/shader_recompiler/frontend/fetch_shader.h | 10 +- .../frontend/translate/translate.cpp | 9 +- src/shader_recompiler/ir/program.h | 2 - src/shader_recompiler/recompiler.cpp | 3 +- src/shader_recompiler/recompiler.h | 5 +- src/shader_recompiler/runtime_info.h | 18 ++- src/video_core/amdgpu/liverpool.cpp | 6 +- src/video_core/amdgpu/liverpool.h | 90 ++++++++----- src/video_core/amdgpu/pixel_format.h | 1 - .../renderer_vulkan/liverpool_to_vk.cpp | 32 ++++- .../renderer_vulkan/liverpool_to_vk.h | 5 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 77 +++++++++-- .../renderer_vulkan/vk_graphics_pipeline.h | 35 ++++- .../renderer_vulkan/vk_instance.cpp | 10 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 121 +++++++++++------- .../renderer_vulkan/vk_pipeline_cache.h | 13 +- .../renderer_vulkan/vk_rasterizer.cpp | 63 +++++++-- .../renderer_vulkan/vk_rasterizer.h | 22 ++-- .../renderer_vulkan/vk_stream_buffer.h | 2 +- src/video_core/texture_cache/image.cpp | 6 +- .../texture_cache/texture_cache.cpp | 3 +- 27 files changed, 506 insertions(+), 174 deletions(-) diff --git a/.gitmodules b/.gitmodules index 76ca5dcae..8c54fe861 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,7 +50,7 @@ [submodule "externals/toml11"] path = externals/toml11 url = https://github.com/ToruNiina/toml11.git -[submodule "externals/xxHash"] +[submodule "externals/xxhash"] path = externals/xxHash url = https://github.com/Cyan4973/xxHash.git [submodule "externals/zydis"] diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 24bca2f1c..b2d348b7f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -74,8 +74,8 @@ add_subdirectory(magic_enum EXCLUDE_FROM_ALL) add_subdirectory(toml11 EXCLUDE_FROM_ALL) # xxHash -add_library(xxhash INTERFACE) -target_include_directories(xxhash INTERFACE xxhash) +add_library(xxhash xxhash/xxhash.h xxhash/xxhash.c) +target_include_directories(xxhash PUBLIC xxhash) # Zydis option(ZYDIS_BUILD_TOOLS "" OFF) @@ -92,4 +92,4 @@ endif() add_subdirectory(sirit EXCLUDE_FROM_ALL) if (WIN32) target_compile_options(sirit PUBLIC "-Wno-error=unused-command-line-argument") -endif() \ No newline at end of file +endif() diff --git a/src/core/memory.cpp b/src/core/memory.cpp index aa5c66761..a16abbe01 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -7,6 +7,7 @@ #include "common/scope_exit.h" #include "core/libraries/error_codes.h" #include "core/memory.h" +#include "video_core/renderer_vulkan/vk_instance.h" namespace Core { @@ -61,6 +62,10 @@ int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, M new_vma.prot = prot; new_vma.name = name; new_vma.type = type; + + if (type == VMAType::Direct) { + MapVulkanMemory(mapped_addr, size); + } }; // When virtual addr is zero let the address space manager pick the address. @@ -103,6 +108,10 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { ASSERT_MSG(it != vma_map.end() && it->first == virtual_addr, "Attempting to unmap partially mapped range"); + if (it->second.type == VMAType::Direct) { + UnmapVulkanMemory(virtual_addr, size); + } + // Mark region as free and attempt to coalesce it with neighbours. auto& vma = it->second; vma.type = VMAType::Free; @@ -114,6 +123,13 @@ void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) { impl.Unmap(virtual_addr, size); } +std::pair MemoryManager::GetVulkanBuffer(VAddr addr) { + auto it = mapped_memories.upper_bound(addr); + it = std::prev(it); + ASSERT(it != mapped_memories.end() && it->first <= addr); + return std::make_pair(*it->second.buffer, addr - it->first); +} + VirtualMemoryArea& MemoryManager::AddMapping(VAddr virtual_addr, size_t size) { auto vma_handle = FindVMA(virtual_addr); ASSERT_MSG(vma_handle != vma_map.end(), "Virtual address not in vm_map"); @@ -171,4 +187,81 @@ MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAHandle iter) { return iter; } +void MemoryManager::MapVulkanMemory(VAddr addr, size_t size) { + const vk::Device device = instance->GetDevice(); + const auto memory_props = instance->GetPhysicalDevice().getMemoryProperties(); + void* host_pointer = reinterpret_cast(addr); + const auto host_mem_props = device.getMemoryHostPointerPropertiesEXT( + vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, host_pointer); + ASSERT(host_mem_props.memoryTypeBits != 0); + + int mapped_memory_type = -1; + auto find_mem_type_with_flag = [&](const vk::MemoryPropertyFlags flags) { + u32 host_mem_types = host_mem_props.memoryTypeBits; + while (host_mem_types != 0) { + // Try to find a cached memory type + mapped_memory_type = std::countr_zero(host_mem_types); + host_mem_types -= (1 << mapped_memory_type); + + if ((memory_props.memoryTypes[mapped_memory_type].propertyFlags & flags) == flags) { + return; + } + } + + mapped_memory_type = -1; + }; + + // First try to find a memory that is both coherent and cached + find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent | + vk::MemoryPropertyFlagBits::eHostCached); + if (mapped_memory_type == -1) + // Then only coherent (lower performance) + find_mem_type_with_flag(vk::MemoryPropertyFlagBits::eHostCoherent); + + if (mapped_memory_type == -1) { + LOG_CRITICAL(Render_Vulkan, "No coherent memory available for memory mapping"); + mapped_memory_type = std::countr_zero(host_mem_props.memoryTypeBits); + } + + const vk::StructureChain alloc_info = { + vk::MemoryAllocateInfo{ + .allocationSize = size, + .memoryTypeIndex = static_cast(mapped_memory_type), + }, + vk::ImportMemoryHostPointerInfoEXT{ + .handleType = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + .pHostPointer = host_pointer, + }, + }; + + const auto [it, new_memory] = mapped_memories.try_emplace(addr); + ASSERT_MSG(new_memory, "Attempting to remap already mapped vulkan memory"); + + auto& memory = it->second; + memory.backing = device.allocateMemoryUnique(alloc_info.get()); + + constexpr vk::BufferUsageFlags MapFlags = + vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eUniformBuffer; + + const vk::StructureChain buffer_info = { + vk::BufferCreateInfo{ + .size = size, + .usage = MapFlags, + .sharingMode = vk::SharingMode::eExclusive, + }, + vk::ExternalMemoryBufferCreateInfoKHR{ + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eHostAllocationEXT, + }}; + memory.buffer = device.createBufferUnique(buffer_info.get()); + device.bindBufferMemory(*memory.buffer, *memory.backing, 0); +} + +void MemoryManager::UnmapVulkanMemory(VAddr addr, size_t size) { + const auto it = mapped_memories.find(addr); + ASSERT(it != mapped_memories.end() && it->second.buffer_size == size); + mapped_memories.erase(it); +} + } // namespace Core diff --git a/src/core/memory.h b/src/core/memory.h index a86930c81..4c0fadbfd 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -10,6 +11,11 @@ #include "common/singleton.h" #include "common/types.h" #include "core/address_space.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { +class Instance; +} namespace Core { @@ -86,6 +92,10 @@ public: explicit MemoryManager(); ~MemoryManager(); + void SetInstance(const Vulkan::Instance* instance_) { + instance = instance_; + } + PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment, int memory_type); @@ -97,11 +107,9 @@ public: void UnmapMemory(VAddr virtual_addr, size_t size); -private: - bool HasOverlap(VAddr addr, size_t size) const { - return vma_map.find(addr) != vma_map.end(); - } + std::pair GetVulkanBuffer(VAddr addr); +private: VMAHandle FindVMA(VAddr target) { // Return first the VMA with base >= target. const auto it = vma_map.lower_bound(target); @@ -117,10 +125,22 @@ private: VMAHandle MergeAdjacent(VMAHandle iter); + void MapVulkanMemory(VAddr addr, size_t size); + + void UnmapVulkanMemory(VAddr addr, size_t size); + private: AddressSpace impl; std::vector allocations; VMAMap vma_map; + + struct MappedMemory { + vk::UniqueBuffer buffer; + vk::UniqueDeviceMemory backing; + size_t buffer_size; + }; + std::map mapped_memories; + const Vulkan::Instance* instance{}; }; using Memory = Common::Singleton; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 376175dc9..771e46d40 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -175,12 +175,14 @@ void EmitContext::DefineInputs(const IR::Program& program) { const Id id{DefineInput(type, input.binding)}; Name(id, fmt::format("vs_in_attr{}", input.binding)); input_params[input.binding] = GetAttributeInfo(input.fmt, id); + interfaces.push_back(id); } break; case Stage::Fragment: for (const auto& input : info.ps_inputs) { if (input.is_default) { - input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]}; + input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), + input_f32, F32[1]}; continue; } const IR::Attribute param{IR::Attribute::Param0 + input.param_index}; @@ -192,6 +194,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { } Name(id, fmt::format("fs_in_attr{}", input.semantic)); input_params[input.semantic] = {id, input_f32, F32[1], num_components}; + interfaces.push_back(id); } default: break; @@ -212,6 +215,7 @@ void EmitContext::DefineOutputs(const IR::Program& program) { const Id id{DefineOutput(F32[num_components], i)}; Name(id, fmt::format("out_attr{}", i)); output_params[i] = {id, output_f32, F32[1], num_components}; + interfaces.push_back(id); } break; case Stage::Fragment: diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index b17fbc522..7f4f50e94 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -40,7 +40,7 @@ std::vector ParseFetchShader(const u32* code) { struct VsharpLoad { u32 dword_offset{}; s32 base_sgpr{}; - s32 dst_sgpr{-1}; + s32 dst_reg{-1}; }; boost::container::static_vector loads; @@ -57,11 +57,13 @@ std::vector ParseFetchShader(const u32* code) { } if (inst.inst_class == InstClass::VectorMemBufFmt) { + // SRSRC is in units of 4 SPGRs while SBASE is in pairs of SGPRs + const u32 base_sgpr = inst.src[2].code * 4; + // Find the load instruction that loaded the V# to the SPGR. // This is so we can determine its index in the vertex table. - const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) { - return load.dst_sgpr == inst.src[2].code * 4; - }); + const auto it = std::ranges::find_if( + loads, [&](VsharpLoad& load) { return load.dst_reg == base_sgpr; }); auto& attrib = attributes.emplace_back(); attrib.semantic = semantic_index++; @@ -71,7 +73,7 @@ std::vector ParseFetchShader(const u32* code) { attrib.dword_offset = it->dword_offset; // Mark load as used. - it->dst_sgpr = -1; + it->dst_reg = -1; } } diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 627e19aa0..2f8eae12c 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -9,11 +9,11 @@ namespace Shader::Gcn { struct VertexAttribute { - u8 semantic; ///< Semantic index of the attribute - u8 dest_vgpr; ///< Destination VGPR to load first component - u8 num_elements; ///< Number of components to load - u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# - u8 dword_offset; ///< The dword offset of the V# that describes this attribute. + u8 semantic; ///< Semantic index of the attribute + u8 dest_vgpr; ///< Destination VGPR to load first component. + u8 num_elements; ///< Number of components to load + u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# + u8 dword_offset; ///< The dword offset of the V# that describes this attribute. }; std::vector ParseFetchShader(const u32* code); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 6dc85d168..06faf28d6 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/exception.h" -#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/fetch_shader.h" +#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" @@ -103,20 +103,21 @@ void Translator::EmitFetch(const GcnInst& inst) { // Parse the assembly to generate a list of attributes. const auto attribs = ParseFetchShader(code); for (const auto& attrib : attribs) { - IR::VectorReg dst_reg{attrib.dest_vgpr}; const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; + IR::VectorReg dst_reg{attrib.dest_vgpr}; for (u32 i = 0; i < attrib.num_elements; i++) { ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i)); } // Read the V# of the attribute to figure out component number and type. - const auto buffer = info.ReadUd(attrib.sgpr_base, - attrib.dword_offset); + const auto buffer = info.ReadUd(attrib.sgpr_base, attrib.dword_offset); const u32 num_components = AmdGpu::NumComponents(buffer.data_format); info.vs_inputs.push_back({ .fmt = buffer.num_format, .binding = attrib.semantic, .num_components = std::min(attrib.num_elements, num_components), + .sgpr_base = attrib.sgpr_base, + .dword_offset = attrib.dword_offset, }); } } diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index 27e33b119..eff933f28 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -12,8 +12,6 @@ namespace Shader::IR { struct Program { - explicit Program(const Info&& info_) : info{info_} {} - AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 86173b5b8..66d19620f 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -44,7 +44,7 @@ IR::Program TranslateProgram(ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& inst_pool, ObjectPool& block_pool, - std::span code, - const Info&& info); + std::span code, const Info&& info); } // namespace Shader diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 959768af2..40c9c6b0f 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -40,12 +40,12 @@ enum class TextureType : u32 { constexpr u32 NUM_TEXTURE_TYPES = 7; struct Info { - explicit Info(std::span user_data_) : user_data{user_data_} {} - struct VsInput { AmdGpu::NumberFormat fmt; u16 binding; u16 num_components; + u8 sgpr_base; + u8 dword_offset; }; boost::container::static_vector vs_inputs{}; @@ -60,29 +60,33 @@ struct Info { struct AttributeFlags { bool Get(IR::Attribute attrib, u32 comp = 0) const { - return flags[static_cast(attrib)] & (1 << comp); + return flags[Index(attrib)] & (1 << comp); } bool GetAny(IR::Attribute attrib) const { - return flags[static_cast(attrib)]; + return flags[Index(attrib)]; } void Set(IR::Attribute attrib, u32 comp = 0) { - flags[static_cast(attrib)] |= (1 << comp); + flags[Index(attrib)] |= (1 << comp); } u32 NumComponents(IR::Attribute attrib) const { - const u8 mask = flags[static_cast(attrib)]; + const u8 mask = flags[Index(attrib)]; ASSERT(mask != 0b1011 || mask != 0b1101); return std::popcount(mask); } + static size_t Index(IR::Attribute attrib) { + return static_cast(attrib); + } + std::array flags; }; AttributeFlags loads{}; AttributeFlags stores{}; - std::span user_data; + std::span user_data; Stage stage; template diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index b1563a34a..d43f749b9 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -114,7 +114,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; if (rasterizer) { - rasterizer->DrawIndex(); + rasterizer->Draw(true); } break; } @@ -122,7 +122,9 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; - rasterizer->DrawIndex(); + if (rasterizer) { + rasterizer->Draw(false); + } break; } case PM4ItOpcode::DispatchDirect: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index a4e9df376..83fd2494a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -180,25 +180,6 @@ struct Liverpool { BitField<31, 1, u32> disable_color_writes_on_depth_pass; }; - union DepthSize { - u32 raw; - BitField<0, 11, u32> pitch_tile_max; - BitField<11, 11, u32> height_tile_max; - - u32 Pitch() const { - return (pitch_tile_max + 1) << 3; - } - - u32 Height() const { - return (height_tile_max + 1) << 3; - } - }; - - union DepthSlice { - u32 raw; - BitField<0, 22, u32> slice_tile_max; - }; - enum class StencilFunc : u32 { Keep = 0, Zero = 1, @@ -236,9 +217,45 @@ struct Liverpool { BitField<24, 8, u32> stencil_op_val; }; - union StencilInfo { - u32 raw; - BitField<0, 1, u32> format; + struct DepthBuffer { + enum class ZFormat : u32 { + Invald = 0, + Z16 = 1, + Z32Float = 2, + }; + + enum class StencilFormat : u32 { + Invalid = 0, + Stencil8 = 1, + }; + + union { + BitField<0, 2, ZFormat> format; + BitField<2, 2, u32> num_samples; + BitField<13, 3, u32> tile_split; + } z_info; + union { + BitField<0, 1, StencilFormat> format; + } stencil_info; + u32 z_read_base; + u32 stencil_read_base; + u32 z_write_base; + u32 stencil_write_base; + union { + BitField<0, 11, u32> pitch_tile_max; + BitField<11, 11, u32> height_tile_max; + } depth_size; + union { + BitField<0, 22, u32> tile_max; + } depth_slice; + + u32 Pitch() const { + return (depth_size.pitch_tile_max + 1) << 3; + } + + u32 Height() const { + return (depth_size.height_tile_max + 1) << 3; + } }; enum class ClipSpace : u32 { @@ -505,6 +522,12 @@ struct Liverpool { u64 CmaskAddress() const { return u64(cmask_base_address) << 8; } + + NumberFormat NumFormat() const { + // There is a small difference between T# and CB number types, account for it. + return info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + : info.number_type; + } }; enum class PrimitiveType : u32 { @@ -539,14 +562,8 @@ struct Liverpool { u32 stencil_clear; u32 depth_clear; Scissor screen_scissor; - INSERT_PADDING_WORDS(0xA011 - 0xA00C - 2); - StencilInfo stencil_info; - u32 z_read_base; - u32 stencil_read_base; - u32 z_write_base; - u32 stencil_write_base; - DepthSize depth_size; - DepthSlice depth_slice; + INSERT_PADDING_WORDS(0xA010 - 0xA00C - 2); + DepthBuffer depth_buffer; INSERT_PADDING_WORDS(0xA08E - 0xA018); ColorBufferMask color_target_mask; ColorBufferMask color_shader_mask; @@ -595,6 +612,17 @@ struct Liverpool { VgtNumInstances num_instances; }; std::array reg_array{}; + + const ShaderProgram* ProgramForStage(u32 index) const { + switch (index) { + case 0: + return &vs_program; + case 4: + return &ps_program; + default: + return nullptr; + } + } }; Regs regs{}; @@ -635,7 +663,7 @@ static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); static_assert(GFX6_3D_REG_INDEX(vs_program) == 0x2C48); static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); -static_assert(GFX6_3D_REG_INDEX(depth_slice) == 0xA017); +static_assert(GFX6_3D_REG_INDEX(depth_buffer.depth_slice) == 0xA017); static_assert(GFX6_3D_REG_INDEX(color_target_mask) == 0xA08E); static_assert(GFX6_3D_REG_INDEX(color_shader_mask) == 0xA08F); static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094); diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index fb0c27517..7555cdb33 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -76,4 +76,3 @@ struct fmt::formatter { return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt)); } }; - diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 00b28de9e..906b937e1 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#pragma clang optimize off + #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" @@ -114,19 +114,41 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { - if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) { + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && + num_format == AmdGpu::NumberFormat::Float) { return vk::Format::eR32G32B32A32Sfloat; } - if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) { + if (data_format == AmdGpu::DataFormat::Format32_32_32 && + num_format == AmdGpu::NumberFormat::Uint) { return vk::Format::eR32G32B32Uint; } - if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) { + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eR8G8B8A8Unorm; } - if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) { + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && + num_format == AmdGpu::NumberFormat::Srgb) { return vk::Format::eR8G8B8A8Srgb; } UNREACHABLE(); } +vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, + Liverpool::DepthBuffer::StencilFormat stencil_format) { + UNREACHABLE(); +} + +void EmitQuadToTriangleListIndices(u8* out_ptr, u32 num_vertices) { + static constexpr u16 NumVerticesPerQuad = 4; + u16* out_data = reinterpret_cast(out_ptr); + for (u16 i = 0; i < num_vertices; i += NumVerticesPerQuad) { + *out_data++ = i; + *out_data++ = i + 1; + *out_data++ = i + 2; + *out_data++ = i + 2; + *out_data++ = i; + *out_data++ = i + 3; + } +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index c04b1cb95..38f021fdb 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -23,4 +23,9 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode); vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); +vk::Format DepthFormat(Liverpool::DepthBuffer::ZFormat z_format, + Liverpool::DepthBuffer::StencilFormat stencil_format); + +void EmitQuadToTriangleListIndices(u8* out_indices, u32 num_vertices); + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 4ba2b61bd..3db09efef 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -4,22 +4,58 @@ #include #include "common/assert.h" +#include "core/memory.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { -GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& key_, - vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, +GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_, + const PipelineKey& key_, vk::PipelineCache pipeline_cache, + std::span infos, std::array modules) - : instance{instance_}, pipeline_layout{layout_}, pipeline_cache{pipeline_cache_}, key{key_} { + : instance{instance_}, scheduler{scheduler_}, key{key_} { const vk::Device device = instance.GetDevice(); + for (u32 i = 0; i < MaxShaderStages; i++) { + if (!infos[i]) { + continue; + } + stages[i] = *infos[i]; + } + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 0U, + .pSetLayouts = nullptr, + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }; + pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); + + boost::container::static_vector bindings; + boost::container::static_vector attributes; + const auto& vs_info = stages[0]; + for (const auto& input : vs_info.vs_inputs) { + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + attributes.push_back({ + .location = input.binding, + .binding = input.binding, + .format = LiverpoolToVK::SurfaceFormat(buffer.data_format, buffer.num_format), + .offset = 0, + }); + bindings.push_back({ + .binding = input.binding, + .stride = u32(buffer.stride), + .inputRate = vk::VertexInputRate::eVertex, + }); + } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { - .vertexBindingDescriptionCount = 0U, - .pVertexBindingDescriptions = nullptr, - .vertexAttributeDescriptionCount = 0U, - .pVertexAttributeDescriptions = nullptr, + .vertexBindingDescriptionCount = static_cast(bindings.size()), + .pVertexBindingDescriptions = bindings.data(), + .vertexAttributeDescriptionCount = static_cast(attributes.size()), + .pVertexAttributeDescriptions = attributes.data(), }; const vk::PipelineInputAssemblyStateCreateInfo input_assembly = { @@ -126,11 +162,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pName = "main", }; - const vk::Format color_format = vk::Format::eR8G8B8A8Srgb; + const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined); + const u32 num_color_formats = std::distance(key.color_formats.begin(), it); const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { - .colorAttachmentCount = 1, - .pColorAttachmentFormats = &color_format, - .depthAttachmentFormat = vk::Format::eUndefined, + .colorAttachmentCount = num_color_formats, + .pColorAttachmentFormats = key.color_formats.data(), + .depthAttachmentFormat = key.depth.depth_enable ? key.depth_format : vk::Format::eUndefined, .stencilAttachmentFormat = vk::Format::eUndefined, }; @@ -146,7 +183,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pDepthStencilState = &depth_info, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, - .layout = pipeline_layout, + .layout = *pipeline_layout, }; auto result = device.createGraphicsPipelineUnique(pipeline_cache, pipeline_info); @@ -159,4 +196,20 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& GraphicsPipeline::~GraphicsPipeline() = default; +void GraphicsPipeline::BindResources(Core::MemoryManager* memory) const { + std::array buffers; + std::array offsets; + + const auto& vs_info = stages[0]; + const size_t num_buffers = vs_info.vs_inputs.size(); + for (u32 i = 0; i < num_buffers; ++i) { + const auto& input = vs_info.vs_inputs[i]; + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); + std::tie(buffers[i], offsets[i]) = memory->GetVulkanBuffer(buffer.base_address); + } + + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindVertexBuffers(0, num_buffers, buffers.data(), offsets.data()); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index d8b7887b5..47cc5c233 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -1,19 +1,31 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/types.h" +#include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_common.h" +namespace Core { +class MemoryManager; +} + namespace Vulkan { +static constexpr u32 MaxVertexBufferCount = 32; static constexpr u32 MaxShaderStages = 5; class Instance; +class Scheduler; using Liverpool = AmdGpu::Liverpool; struct PipelineKey { + std::array stage_hashes; + std::array color_formats; + vk::Format depth_format; + Liverpool::DepthControl depth; Liverpool::StencilControl stencil; Liverpool::StencilRefMask stencil_ref_front; @@ -21,26 +33,41 @@ struct PipelineKey { Liverpool::PrimitiveType prim_type; Liverpool::PolygonMode polygon_mode; Liverpool::CullMode cull_mode; + + bool operator==(const PipelineKey& key) const noexcept { + return std::memcmp(this, &key, sizeof(PipelineKey)) == 0; + } }; static_assert(std::has_unique_object_representations_v); class GraphicsPipeline { public: - explicit GraphicsPipeline(const Instance& instance, const PipelineKey& key, - vk::PipelineCache pipeline_cache, vk::PipelineLayout layout, + explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler, + const PipelineKey& key, vk::PipelineCache pipeline_cache, + std::span infos, std::array modules); ~GraphicsPipeline(); + void BindResources(Core::MemoryManager* memory) const; + [[nodiscard]] vk::Pipeline Handle() const noexcept { return *pipeline; } private: const Instance& instance; + Scheduler& scheduler; vk::UniquePipeline pipeline; - vk::PipelineLayout pipeline_layout; - vk::PipelineCache pipeline_cache; + vk::UniquePipelineLayout pipeline_layout; + std::array stages; PipelineKey key; }; } // namespace Vulkan + +template <> +struct std::hash { + std::size_t operator()(const Vulkan::PipelineKey& key) const noexcept { + return XXH3_64bits(&key, sizeof(key)); + } +}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 0cde3e6e4..32dca0c5a 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -271,11 +271,11 @@ void Instance::CollectDeviceParameters() { const std::string api_version = GetReadableVersion(properties.apiVersion); const std::string extensions = fmt::format("{}", fmt::join(available_extensions, ", ")); - LOG_INFO(Render_Vulkan, "GPU_Vendor", vendor_name); - LOG_INFO(Render_Vulkan, "GPU_Model", model_name); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver", driver_name); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version", api_version); - LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions", extensions); + LOG_INFO(Render_Vulkan, "GPU_Vendor: {}", vendor_name); + LOG_INFO(Render_Vulkan, "GPU_Model: {}", model_name); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Driver: {}", driver_name); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Version: {}", api_version); + LOG_INFO(Render_Vulkan, "GPU_Vulkan_Extensions: {}", extensions); } void Instance::CollectToolingInfo() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e0134442f..6de86c4c1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "common/scope_exit.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" #include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" +#include "video_core/amdgpu/resource.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -14,8 +14,9 @@ namespace Vulkan { Shader::Info MakeShaderInfo(Shader::Stage stage, std::span user_data, - AmdGpu::Liverpool::Regs& regs) { - Shader::Info info{user_data}; + const AmdGpu::Liverpool::Regs& regs) { + Shader::Info info{}; + info.user_data = user_data; info.stage = stage; switch (stage) { case Shader::Stage::Fragment: { @@ -39,66 +40,96 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { - const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = 0U, - .pSetLayouts = nullptr, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); } -void PipelineCache::BindPipeline() { - SCOPE_EXIT { - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); - }; +const GraphicsPipeline* PipelineCache::GetPipeline() { + RefreshKey(); + const auto [it, is_new] = graphics_pipelines.try_emplace(graphics_key); + if (is_new) { + it.value() = CreatePipeline(); + } + const GraphicsPipeline* pipeline = it->second.get(); + return pipeline; +} - if (pipeline) { - return; +void PipelineCache::RefreshKey() { + auto& regs = liverpool->regs; + auto& key = graphics_key; + + key.depth = regs.depth_control; + key.stencil = regs.stencil_control; + key.stencil_ref_front = regs.stencil_ref_front; + key.stencil_ref_back = regs.stencil_ref_back; + key.prim_type = regs.primitive_type; + key.polygon_mode = regs.polygon_control.PolyMode(); + + const auto& db = regs.depth_buffer; + key.depth_format = key.depth.depth_enable + ? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format) + : vk::Format::eUndefined; + for (u32 i = 0; i < Liverpool::NumColorBuffers; i++) { + const auto& cb = regs.color_buffers[i]; + key.color_formats[i] = cb.base_address + ? LiverpoolToVK::SurfaceFormat(cb.info.format, cb.NumFormat()) + : vk::Format::eUndefined; } - const auto get_program = [&](const AmdGpu::Liverpool::ShaderProgram& pgm, Shader::Stage stage) { - const u32* token = pgm.Address(); + for (u32 i = 0; i < MaxShaderStages; i++) { + auto* pgm = regs.ProgramForStage(i); + if (!pgm || !pgm->Address()) { + key.stage_hashes[i] = 0; + continue; + } + const u32* code = pgm->Address(); - // Retrieve shader header. Shader::BinaryInfo bininfo; - std::memcpy(&bininfo, token + (token[1] + 1) * 2, sizeof(bininfo)); + std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + key.stage_hashes[i] = bininfo.shader_hash; + } +} + +std::unique_ptr PipelineCache::CreatePipeline() { + const auto& regs = liverpool->regs; + + std::array programs; + std::array infos{}; + + for (u32 i = 0; i < MaxShaderStages; i++) { + if (!graphics_key.stage_hashes[i]) { + stages[i] = VK_NULL_HANDLE; + continue; + } + auto* pgm = regs.ProgramForStage(i); + const u32* code = pgm->Address(); + + Shader::BinaryInfo bininfo; + std::memcpy(&bininfo, code + (code[1] + 1) * 2, sizeof(bininfo)); + const u32 num_dwords = bininfo.length / sizeof(u32); - // Lookup if the shader already exists. const auto it = module_map.find(bininfo.shader_hash); if (it != module_map.end()) { - return *it->second; + stages[i] = *it->second; + continue; } - // Compile and cache shader. - const auto data = std::span{token, bininfo.length / sizeof(u32)}; block_pool.ReleaseContents(); inst_pool.ReleaseContents(); - const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs); - auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info)); - const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program); - static int counter = 0; - std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary); - file.write((const char*)code.data(), code.size() * sizeof(u32)); - file.close(); + // Recompile shader to IR. + const auto stage = Shader::Stage{i}; + const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs); + programs[i] = Shader::TranslateProgram(inst_pool, block_pool, std::span{code, num_dwords}, + std::move(info)); - return CompileSPV(code, instance.GetDevice()); - }; + // Compile IR to SPIR-V + const auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, programs[i]); + stages[i] = CompileSPV(spv_code, instance.GetDevice()); + infos[i] = &programs[i].info; + } - // Retrieve shader stage modules. - // TODO: Only do this when program address is changed. - stages[0] = get_program(liverpool->regs.vs_program, Shader::Stage::Vertex); - stages[4] = get_program(liverpool->regs.ps_program, Shader::Stage::Fragment); - - // Bind pipeline. - // TODO: Read entire key based on reg state. - graphics_key.prim_type = liverpool->regs.primitive_type; - graphics_key.polygon_mode = liverpool->regs.polygon_control.PolyMode(); - pipeline = std::make_unique(instance, graphics_key, *pipeline_cache, - *pipeline_layout, stages); + return std::make_unique(instance, scheduler, graphics_key, *pipeline_cache, + infos, stages); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7634f9cbc..32830eabf 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -8,6 +8,10 @@ #include "shader_recompiler/object_pool.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +namespace Shader { +struct Info; +} + namespace Vulkan { class Instance; @@ -21,7 +25,12 @@ public: AmdGpu::Liverpool* liverpool); ~PipelineCache() = default; - void BindPipeline(); + const GraphicsPipeline* GetPipeline(); + +private: + void RefreshKey(); + + std::unique_ptr CreatePipeline(); private: const Instance& instance; @@ -31,7 +40,7 @@ private: vk::UniquePipelineLayout pipeline_layout; tsl::robin_map module_map; std::array stages{}; - std::unique_ptr pipeline; + tsl::robin_map> graphics_pipelines; PipelineKey graphics_key{}; Shader::ObjectPool inst_pool; Shader::ObjectPool block_pool; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 595dcff1c..3d301f620 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/config.h" +#include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" @@ -18,24 +19,25 @@ static constexpr vk::BufferUsageFlags VertexIndexFlags = vk::BufferUsageFlagBits Rasterizer::Rasterizer(const Instance& instance_, Scheduler& scheduler_, VideoCore::TextureCache& texture_cache_, AmdGpu::Liverpool* liverpool_) : instance{instance_}, scheduler{scheduler_}, texture_cache{texture_cache_}, - liverpool{liverpool_}, pipeline_cache{instance, scheduler, liverpool}, + liverpool{liverpool_}, memory{Core::Memory::Instance()}, + pipeline_cache{instance, scheduler, liverpool}, vertex_index_buffer{instance, scheduler, VertexIndexFlags, 64_MB} { if (!Config::nullGpu()) { liverpool->BindRasterizer(this); } + + memory->SetInstance(&instance); } Rasterizer::~Rasterizer() = default; -void Rasterizer::DrawIndex() { +void Rasterizer::Draw(bool is_indexed) { const auto cmdbuf = scheduler.CommandBuffer(); - auto& regs = liverpool->regs; - - UpdateDynamicState(); - - pipeline_cache.BindPipeline(); - - auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); + const auto& regs = liverpool->regs; + const u32 num_indices = SetupIndexBuffer(is_indexed); + const auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); + const GraphicsPipeline* pipeline = pipeline_cache.GetPipeline(); + pipeline->BindResources(memory); const vk::RenderingAttachmentInfo color_info = { .imageView = *image_view.image_view, @@ -52,13 +54,50 @@ void Rasterizer::DrawIndex() { .pColorAttachments = &color_info, }; + UpdateDynamicState(); + cmdbuf.beginRendering(rendering_info); - cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), 0, vk::IndexType::eUint32); - cmdbuf.bindVertexBuffers(0, vertex_index_buffer.Handle(), vk::DeviceSize(0)); - cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + if (is_indexed) { + cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); + } else { + cmdbuf.draw(regs.num_indices, regs.num_instances.NumInstances(), 0, 0); + } cmdbuf.endRendering(); } +u32 Rasterizer::SetupIndexBuffer(bool& is_indexed) { + // Emulate QuadList primitive type with CPU made index buffer. + const auto& regs = liverpool->regs; + if (liverpool->regs.primitive_type == Liverpool::PrimitiveType::QuadList) { + ASSERT_MSG(!is_indexed, "Using QuadList primitive with indexed draw"); + is_indexed = true; + + // Emit indices. + const u32 index_size = 3 * regs.num_indices; + const auto [data, offset, _] = vertex_index_buffer.Map(index_size); + LiverpoolToVK::EmitQuadToTriangleListIndices(data, regs.num_indices); + vertex_index_buffer.Commit(index_size); + + // Bind index buffer. + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(vertex_index_buffer.Handle(), offset, vk::IndexType::eUint16); + return index_size / sizeof(u16); + } + if (!is_indexed) { + return 0; + } + + const VAddr index_address = regs.index_base_address.Address(); + const auto [buffer, offset] = memory->GetVulkanBuffer(index_address); + const vk::IndexType index_type = + regs.index_buffer_type.index_type == Liverpool::IndexType::Index16 ? vk::IndexType::eUint16 + : vk::IndexType::eUint32; + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.bindIndexBuffer(buffer, offset, index_type); + return regs.num_indices; +} + void Rasterizer::UpdateDynamicState() { UpdateViewportScissorState(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index a1e940bac..a8386c252 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -3,7 +3,6 @@ #pragma once -#include #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" @@ -11,6 +10,10 @@ namespace AmdGpu { struct Liverpool; } +namespace Core { +class MemoryManager; +} + namespace VideoCore { class TextureCache; } @@ -26,20 +29,14 @@ public: VideoCore::TextureCache& texture_cache, AmdGpu::Liverpool* liverpool); ~Rasterizer(); - /// Performs a draw call with an index buffer. - void DrawIndex(); - - /// Performs a draw call without an index buffer. - void DrawAuto(); - - /// Updates graphics state that is not part of the bound pipeline. - void UpdateDynamicState(); + void Draw(bool is_indexed); private: - /// Updates viewport and scissor from liverpool registers. - void UpdateViewportScissorState(); + u32 SetupIndexBuffer(bool& is_indexed); + void MapMemory(VAddr addr, size_t size); - /// Updates depth and stencil pipeline state from liverpool registers. + void UpdateDynamicState(); + void UpdateViewportScissorState(); void UpdateDepthStencilState(); private: @@ -47,6 +44,7 @@ private: Scheduler& scheduler; VideoCore::TextureCache& texture_cache; AmdGpu::Liverpool* liverpool; + Core::MemoryManager* memory; PipelineCache pipeline_cache; StreamBuffer vertex_index_buffer; }; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index d31a1f5d3..637f03d05 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -35,7 +35,7 @@ public: * @param size Size to reserve. * @returns A pair of a raw memory pointer (with offset added), and the buffer offset */ - std::tuple Map(u64 size, u64 alignment); + std::tuple Map(u64 size, u64 alignment = 0); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. void Commit(u64 size); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index e9ac4ff02..6a7bba8e2 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -67,12 +67,8 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { - // There is a small difference between T# and CB number types, account for it. - const auto number_fmt = - buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb - : buffer.info.number_type; is_tiled = true; - pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt); + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); type = vk::ImageType::e2D; size.width = buffer.Pitch(); size.height = buffer.Height(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 17cc3ec26..e21bb6ed2 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -147,7 +147,8 @@ ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buff return slot_image_views[view_id]; } - const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image); + const ImageViewId view_id = + slot_image_views.insert(instance, scheduler, view_info, image.image); image.image_view_infos.emplace_back(view_info); image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id];