From 08e155946e8ce6a5c9caafe29f00b8d4baf69ec8 Mon Sep 17 00:00:00 2001 From: raphaelthegreat <47210458+raphaelthegreat@users.noreply.github.com> Date: Wed, 22 May 2024 23:05:19 +0300 Subject: [PATCH] video_core: Remove hack in rasterizer * The hack was to skip the first draw as the display buffer had not been created yet and the texture cache couldn't create one itself. With this patch it now can, using the color buffer parameters from registers --- .../frontend/fetch_shader.cpp | 81 +++++++++++++++++++ src/shader_recompiler/frontend/fetch_shader.h | 22 +++++ .../frontend/structured_control_flow.cpp | 1 - src/shader_recompiler/ir/attribute.cpp | 4 + src/shader_recompiler/ir/passes/passes.h | 3 +- .../ir/passes/resource_tracking_pass.cpp | 5 +- src/shader_recompiler/ir/program.h | 3 + src/shader_recompiler/recompiler.cpp | 13 ++- src/shader_recompiler/recompiler.h | 1 + src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/amdgpu/liverpool.h | 38 +-------- src/video_core/amdgpu/pixel_format.cpp | 2 +- src/video_core/amdgpu/pixel_format.h | 2 +- .../renderer_vulkan/liverpool_to_vk.cpp | 21 ++++- .../renderer_vulkan/liverpool_to_vk.h | 3 + .../renderer_vulkan/renderer_vulkan.cpp | 2 - .../renderer_vulkan/vk_graphics_pipeline.cpp | 2 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 11 +-- .../renderer_vulkan/vk_rasterizer.h | 3 + src/video_core/texture_cache/image.cpp | 15 ++++ src/video_core/texture_cache/image.h | 2 + .../texture_cache/texture_cache.cpp | 33 +++----- src/video_core/texture_cache/texture_cache.h | 2 +- 24 files changed, 193 insertions(+), 80 deletions(-) create mode 100644 src/shader_recompiler/frontend/fetch_shader.cpp create mode 100644 src/shader_recompiler/frontend/fetch_shader.h diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp new file mode 100644 index 000000000..1ae8c8944 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/frontend/fetch_shader.h" + +namespace Shader::Gcn { + +/** + * s_load_dwordx4 s[8:11], s[2:3], 0x00 + * s_load_dwordx4 s[12:15], s[2:3], 0x04 + * s_load_dwordx4 s[16:19], s[2:3], 0x08 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[8:11], 0 idxen + * buffer_load_format_xyz v[8:10], v0, s[12:15], 0 idxen + * buffer_load_format_xy v[12:13], v0, s[16:19], 0 idxen + * s_waitcnt 0 + * s_setpc_b64 s[0:1] + + * s_load_dwordx4 s[4:7], s[2:3], 0x0 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen + * s_load_dwordx4 s[4:7], s[2:3], 0x8 + * s_waitcnt lgkmcnt(0) + * buffer_load_format_xyzw v[8:11], v0, s[4:7], 0 idxen + * s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0) + * s_setpc_b64 s[0:1] + + * A normal fetch shader looks like the above, the instructions are generated + * using input semantics on cpu side. Load instructions can either be separate or interleaved + * We take the reverse way, extract the original input semantics from these instructions. + **/ + +std::vector ParseFetchShader(std::span code) { + std::vector attributes; + GcnCodeSlice code_slice(code.data(), code.data() + std::numeric_limits::max()); + GcnDecodeContext decoder; + + struct VsharpLoad { + u32 dword_offset{}; + s32 base_sgpr{}; + s32 dst_sgpr{-1}; + }; + boost::container::static_vector loads; + + u32 semantic_index = 0; + while (!code_slice.atEnd()) { + const auto inst = decoder.decodeInstruction(code_slice); + if (inst.opcode == Opcode::S_SETPC_B64) { + break; + } + + if (inst.inst_class == InstClass::ScalarMemRd) { + loads.emplace_back(inst.control.smrd.offset, inst.src[0].code * 2, inst.dst[0].code); + continue; + } + + if (inst.inst_class == InstClass::VectorMemBufFmt) { + // Find the load instruction that loaded the V# to the SPGR. + // This is so we can determine its index in the vertex table. + const auto it = std::ranges::find_if(loads, [&](VsharpLoad& load) { + return load.dst_sgpr == inst.src[2].code * 4; + }); + + auto& attrib = attributes.emplace_back(); + attrib.semantic = semantic_index++; + attrib.dest_vgpr = inst.src[1].code; + attrib.num_elements = inst.control.mubuf.count; + attrib.sgpr_base = it->base_sgpr; + attrib.dword_offset = it->dword_offset; + + // Mark load as used. + it->dst_sgpr = -1; + } + } + + return attributes; +} + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h new file mode 100644 index 000000000..636cd5e79 --- /dev/null +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include "common/types.h" + +namespace Shader::Gcn { + +struct VertexAttribute { + u8 semantic; ///< Semantic index of the attribute + u8 dest_vgpr; ///< Destination VGPR to load first component + u8 num_elements; ///< Number of components to load + u8 sgpr_base; ///< SGPR that contains the pointer to the list of vertex V# + u8 dword_offset; ///< The dword offset of the V# that describes this attribute. +}; + +std::vector ParseFetchShader(std::span code); + +} // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/structured_control_flow.cpp b/src/shader_recompiler/frontend/structured_control_flow.cpp index f593529d0..3464a88e5 100644 --- a/src/shader_recompiler/frontend/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/structured_control_flow.cpp @@ -634,7 +634,6 @@ private: const u32 start = stmt.block->begin_index; const u32 size = stmt.block->end_index - start + 1; Translate(current_block, stage, inst_list.subspan(start, size)); - fmt::print("{}\n", IR::DumpBlock(*current_block)); break; } case StatementType::SetVariable: { diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp index 714053bc9..3b60bf650 100644 --- a/src/shader_recompiler/ir/attribute.cpp +++ b/src/shader_recompiler/ir/attribute.cpp @@ -106,6 +106,10 @@ std::string NameOf(Attribute attribute) { return "Param31"; case Attribute::VertexId: return "VertexId"; + case Attribute::InstanceId: + return "InstanceId"; + case Attribute::FragCoord: + return "FragCoord"; default: break; } diff --git a/src/shader_recompiler/ir/passes/passes.h b/src/shader_recompiler/ir/passes/passes.h index 49bb09b18..e4baae92b 100644 --- a/src/shader_recompiler/ir/passes/passes.h +++ b/src/shader_recompiler/ir/passes/passes.h @@ -4,6 +4,7 @@ #pragma once #include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/program.h" namespace Shader::Optimization { @@ -11,6 +12,6 @@ void SsaRewritePass(IR::BlockList& program); void IdentityRemovalPass(IR::BlockList& program); void DeadCodeEliminationPass(IR::BlockList& program); void ConstantPropagationPass(IR::BlockList& program); -void ResourceTrackingPass(IR::BlockList& program); +void ResourceTrackingPass(IR::Program& program); } // namespace Shader::Optimization diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index feb213dfa..39f0b808d 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -113,13 +113,12 @@ SharpLocation TrackSharp(const IR::Value& handle) { }; } -void ResourceTrackingPass(IR::BlockList& program) { - for (IR::Block* const block : program) { +void ResourceTrackingPass(IR::Program& program) { + for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { if (!IsResourceInstruction(inst)) { continue; } - printf("ff\n"); IR::Inst* producer = inst.Arg(0).InstRecursive(); const auto loc = TrackSharp(producer->Arg(0)); fmt::print("Found resource s[{}:{}] is_eud = {}\n", loc.index_dwords, diff --git a/src/shader_recompiler/ir/program.h b/src/shader_recompiler/ir/program.h index f4f5197f8..2efb6f507 100644 --- a/src/shader_recompiler/ir/program.h +++ b/src/shader_recompiler/ir/program.h @@ -15,11 +15,14 @@ enum class Stage : u32; namespace Shader::IR { +static constexpr size_t NumUserDataRegs = 16; + struct Program { AbstractSyntaxList syntax_list; BlockList blocks; BlockList post_order_blocks; std::vector ins_list; + std::array user_data; Stage stage; }; diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index 5bc521bd4..3215ed6dd 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -32,6 +32,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { std::vector TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Stage stage, + std::span ud_regs, std::span token) { // Ensure first instruction is expected. constexpr u32 token_mov_vcchi = 0xBEEB03FF; @@ -40,6 +41,11 @@ std::vector TranslateProgram(ObjectPool& inst_pool, Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size()); Gcn::GcnDecodeContext decoder; + static int counter = 0; + std::ofstream file(fmt::format("shader{}.bin", counter++), std::ios::out | std::ios::binary); + file.write((const char*)token.data(), token.size_bytes()); + file.close(); + // Decode and save instructions IR::Program program; program.ins_list.reserve(token.size()); @@ -56,14 +62,19 @@ std::vector TranslateProgram(ObjectPool& inst_pool, program.blocks = GenerateBlocks(program.syntax_list); program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); program.stage = stage; + std::ranges::copy(ud_regs, program.user_data.begin()); // Run optimization passes Shader::Optimization::SsaRewritePass(program.post_order_blocks); Shader::Optimization::ConstantPropagationPass(program.post_order_blocks); Shader::Optimization::IdentityRemovalPass(program.blocks); - // Shader::Optimization::ResourceTrackingPass(program.post_order_blocks); + Shader::Optimization::ResourceTrackingPass(program); Shader::Optimization::DeadCodeEliminationPass(program.blocks); + for (const auto& block : program.blocks) { + fmt::print("{}\n", IR::DumpBlock(*block)); + } + // TODO: Pass profile from vulkan backend const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program); return code; diff --git a/src/shader_recompiler/recompiler.h b/src/shader_recompiler/recompiler.h index 8cd9c7eac..c746c3d8f 100644 --- a/src/shader_recompiler/recompiler.h +++ b/src/shader_recompiler/recompiler.h @@ -28,6 +28,7 @@ struct BinaryInfo { [[nodiscard]] std::vector TranslateProgram(ObjectPool& inst_pool, ObjectPool& block_pool, Stage stage, + std::span ud_regs, std::span code); } // namespace Shader diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 09c1cb669..b1563a34a 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -122,7 +122,7 @@ void Liverpool::ProcessCmdList(const u32* cmdbuf, u32 size_in_bytes) { const auto* draw_index = reinterpret_cast(header); regs.num_indices = draw_index->index_count; regs.draw_initiator = draw_index->draw_initiator; - // rasterizer->DrawIndex(); + rasterizer->DrawIndex(); break; } case PM4ItOpcode::DispatchDirect: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index f0a27bb18..c93d019b2 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -6,6 +6,7 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/types.h" +#include "video_core/amdgpu/pixel_format.h" #include #include @@ -423,39 +424,6 @@ struct Liverpool { Swap8In64 = 3, }; - enum class Format : u32 { - Invalid = 0, - Color_8 = 1, - Color_16 = 2, - Color_8_8 = 3, - Color_32 = 4, - Color_16_16 = 5, - Color_10_11_11 = 6, - Color_11_11_10 = 7, - Color_10_10_10_2 = 8, - Color_2_10_10_10 = 9, - Color_8_8_8_8 = 10, - Color_32_32 = 11, - Color_16_16_16_16 = 12, - Color_32_32_32_32 = 14, - Color_5_6_5 = 16, - Color_1_5_5_5 = 17, - Color_5_5_5_1 = 18, - Color_4_4_4_4 = 19, - Color_8_24 = 20, - Color_24_8 = 21, - Color_X24_8_32_FL = 22, - }; - - enum class NumberType : u32 { - Unorm = 0, - Snorm = 1, - Uint = 4, - Sint = 5, - Srgb = 6, - Float = 7, - }; - enum class SwapMode : u32 { Standard = 0, Alternate = 1, @@ -482,9 +450,9 @@ struct Liverpool { } view; union { BitField<0, 2, EndianSwap> endian; - BitField<2, 5, Format> format; + BitField<2, 5, DataFormat> format; BitField<7, 1, u32> linear_general; - BitField<8, 2, NumberType> number_type; + BitField<8, 2, NumberFormat> number_type; BitField<11, 2, SwapMode> comp_swap; BitField<13, 1, u32> fast_clear; BitField<14, 1, u32> compression; diff --git a/src/video_core/amdgpu/pixel_format.cpp b/src/video_core/amdgpu/pixel_format.cpp index 775fb1f1f..5bb8f0fbf 100644 --- a/src/video_core/amdgpu/pixel_format.cpp +++ b/src/video_core/amdgpu/pixel_format.cpp @@ -6,7 +6,7 @@ namespace AmdGpu { -u32 getNumComponents(DataFormat format) { +u32 NumComponents(DataFormat format) { constexpr std::array numComponentsPerElement = { 0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2, 2, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 3, 3, 4, 4, 4, 1, 2, 3, 4, diff --git a/src/video_core/amdgpu/pixel_format.h b/src/video_core/amdgpu/pixel_format.h index 488b00fc4..f28e42356 100644 --- a/src/video_core/amdgpu/pixel_format.h +++ b/src/video_core/amdgpu/pixel_format.h @@ -59,6 +59,6 @@ enum class NumberFormat : u32 { Ubscaled = 13, }; -u32 getNumComponents(DataFormat format); +u32 NumComponents(DataFormat format); } // namespace AmdGpu diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 8f9a76a27..00b28de9e 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -1,6 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - +#pragma clang optimize off #include "common/assert.h" #include "video_core/renderer_vulkan/liverpool_to_vk.h" @@ -74,6 +74,9 @@ vk::PrimitiveTopology PrimitiveType(Liverpool::PrimitiveType type) { return vk::PrimitiveTopology::eTriangleListWithAdjacency; case Liverpool::PrimitiveType::AdjTriangleStrip: return vk::PrimitiveTopology::eTriangleStripWithAdjacency; + case Liverpool::PrimitiveType::QuadList: + // Needs to generate index buffer on the fly. + return vk::PrimitiveTopology::eTriangleList; default: UNREACHABLE(); return vk::PrimitiveTopology::eTriangleList; @@ -110,4 +113,20 @@ vk::CullModeFlags CullMode(Liverpool::CullMode mode) { } } +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format) { + if (data_format == AmdGpu::DataFormat::Format32_32_32_32 && num_format == AmdGpu::NumberFormat::Float) { + return vk::Format::eR32G32B32A32Sfloat; + } + if (data_format == AmdGpu::DataFormat::Format32_32_32 && num_format == AmdGpu::NumberFormat::Uint) { + return vk::Format::eR32G32B32Uint; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR8G8B8A8Unorm; + } + if (data_format == AmdGpu::DataFormat::Format8_8_8_8 && num_format == AmdGpu::NumberFormat::Srgb) { + return vk::Format::eR8G8B8A8Srgb; + } + UNREACHABLE(); +} + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.h b/src/video_core/renderer_vulkan/liverpool_to_vk.h index 97994bf8f..c04b1cb95 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.h +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.h @@ -4,6 +4,7 @@ #pragma once #include "video_core/amdgpu/liverpool.h" +#include "video_core/amdgpu/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan::LiverpoolToVK { @@ -20,4 +21,6 @@ vk::PolygonMode PolygonMode(Liverpool::PolygonMode mode); vk::CullModeFlags CullMode(Liverpool::CullMode mode); +vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat num_format); + } // namespace Vulkan::LiverpoolToVK diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index e952263e7..72ee6c9be 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -174,7 +174,6 @@ bool RendererVulkan::ShowSplash(Frame* frame /*= nullptr*/) { if (!frame) { if (!splash_img.has_value()) { - VideoCore::ImageInfo info{}; info.pixel_format = vk::Format::eR8G8B8A8Srgb; info.type = vk::ImageType::e2D; @@ -200,7 +199,6 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr } Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) { - // Request a free presentation frame. Frame* frame = GetRenderFrame(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6cbd26b97..4ba2b61bd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -126,7 +126,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, const PipelineKey& .pName = "main", }; - const vk::Format color_format = vk::Format::eB8G8R8A8Srgb; + const vk::Format color_format = vk::Format::eR8G8B8A8Srgb; const vk::PipelineRenderingCreateInfoKHR pipeline_rendering_ci = { .colorAttachmentCount = 1, .pColorAttachmentFormats = &color_format, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 28fb51d0f..23281c786 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -50,7 +50,7 @@ void PipelineCache::BindPipeline() { // Compile and cache shader. const auto data = std::span{token, bininfo.length / sizeof(u32)}; - const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, data); + const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, pgm.user_data, data); return CompileSPV(program, instance.GetDevice()); }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5f5d3d4ea..595dcff1c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -31,20 +31,11 @@ void Rasterizer::DrawIndex() { const auto cmdbuf = scheduler.CommandBuffer(); auto& regs = liverpool->regs; - static bool first_time = true; - if (first_time) { - first_time = false; - return; - } - UpdateDynamicState(); pipeline_cache.BindPipeline(); - const u32 pitch = regs.color_buffers[0].Pitch(); - const u32 height = regs.color_buffers[0].Height(); - const u32 tile_max = regs.color_buffers[0].slice.tile_max; - auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0].Address(), pitch); + auto& image_view = texture_cache.RenderTarget(regs.color_buffers[0]); const vk::RenderingAttachmentInfo color_info = { .imageView = *image_view.image_view, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ba3c2d3ac..a1e940bac 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -29,6 +29,9 @@ public: /// Performs a draw call with an index buffer. void DrawIndex(); + /// Performs a draw call without an index buffer. + void DrawAuto(); + /// Updates graphics state that is not part of the bound pipeline. void UpdateDynamicState(); diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index b78d25631..e9ac4ff02 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/config.h" +#include "video_core/renderer_vulkan/liverpool_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/texture_cache/image.h" @@ -65,6 +66,20 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } } +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { + // There is a small difference between T# and CB number types, account for it. + const auto number_fmt = + buffer.info.number_type == AmdGpu::NumberFormat::Uscaled ? AmdGpu::NumberFormat::Srgb + : buffer.info.number_type; + is_tiled = true; + pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, number_fmt); + type = vk::ImageType::e2D; + size.width = buffer.Pitch(); + size.height = buffer.Height(); + pitch = size.width; + guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); +} + UniqueImage::UniqueImage(vk::Device device_, VmaAllocator allocator_) : device{device_}, allocator{allocator_} {} diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c1bddec7f..92391fde1 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -6,6 +6,7 @@ #include "common/enum.h" #include "common/types.h" #include "core/libraries/videoout/buffer.h" +#include "video_core/amdgpu/liverpool.h" #include "video_core/renderer_vulkan/vk_common.h" #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/types.h" @@ -32,6 +33,7 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; bool is_tiled = false; vk::Format pixel_format = vk::Format::eUndefined; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 15679ba91..17cc3ec26 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -101,8 +101,8 @@ TextureCache::~TextureCache() { } void TextureCache::OnCpuWrite(VAddr address) { - const VAddr address_aligned = address & ~((1 << PageBits) - 1); - ForEachImageInRegion(address_aligned, 1 << PageBits, [&](ImageId image_id, Image& image) { + const VAddr address_aligned = address & ~((1 << PageShift) - 1); + ForEachImageInRegion(address_aligned, 1 << PageShift, [&](ImageId image_id, Image& image) { // Ensure image is reuploaded when accessed again. image.flags |= ImageFlagBits::CpuModified; // Untrack image, so the range is unprotected and the guest can write freely. @@ -137,26 +137,19 @@ Image& TextureCache::FindImage(const ImageInfo& info, VAddr cpu_address) { return image; } -ImageView& TextureCache::RenderTarget(VAddr cpu_address, u32 pitch) { - boost::container::small_vector image_ids; - ForEachImageInRegion(cpu_address, pitch * 4, [&](ImageId image_id, Image& image) { - if (image.cpu_addr == cpu_address) { - image_ids.push_back(image_id); - } - }); +ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { + const ImageInfo info{buffer}; + auto& image = FindImage(info, buffer.Address()); - ASSERT_MSG(image_ids.size() <= 1, "Overlapping framebuffers not allowed!"); - auto* image = &slot_images[image_ids.empty() ? ImageId{0} : image_ids.back()]; - - ImageViewInfo info; - info.format = vk::Format::eB8G8R8A8Srgb; - if (const ImageViewId view_id = image->FindView(info); view_id) { + ImageViewInfo view_info; + view_info.format = info.pixel_format; + if (const ImageViewId view_id = image.FindView(view_info); view_id) { return slot_image_views[view_id]; } - const ImageViewId view_id = slot_image_views.insert(instance, scheduler, info, image->image); - image->image_view_infos.emplace_back(info); - image->image_view_ids.emplace_back(view_id); + const ImageViewId view_id = slot_image_views.insert(instance, scheduler, view_info, image.image); + image.image_view_infos.emplace_back(view_info); + image.image_view_ids.emplace_back(view_id); return slot_image_views[view_id]; } @@ -225,13 +218,13 @@ void TextureCache::UnregisterImage(ImageId image_id) { ForEachPage(image.cpu_addr, image.info.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { - ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << PageShift); return; } auto& image_ids = page_it.value(); const auto vector_it = std::ranges::find(image_ids, image_id); if (vector_it == image_ids.end()) { - ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageBits); + ASSERT_MSG(false, "Unregistering unregistered image in page=0x{:x}", page << PageShift); return; } image_ids.erase(vector_it); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a11201c4d..f59f16c4a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -37,7 +37,7 @@ public: Image& FindImage(const ImageInfo& info, VAddr cpu_address); /// Retrieves the render target with specified properties - ImageView& RenderTarget(VAddr cpu_address, u32 pitch); + ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); /// Reuploads image contents. void RefreshImage(Image& image);