From b44a6c10561b11dd2f61670be5410357f47c4d18 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 1 Jun 2024 19:45:23 +0200 Subject: [PATCH] video_core: added a heuristic for determination of CB/DB surface extents --- src/video_core/amdgpu/liverpool.cpp | 57 ++++++++++++++++++- src/video_core/amdgpu/liverpool.h | 27 +++++++++ .../renderer_vulkan/vk_rasterizer.cpp | 7 ++- src/video_core/texture_cache/image.cpp | 7 ++- src/video_core/texture_cache/image.h | 3 +- .../texture_cache/texture_cache.cpp | 5 +- src/video_core/texture_cache/texture_cache.h | 3 +- 7 files changed, 98 insertions(+), 11 deletions(-) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index ddfff4b74..478bc726b 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -167,8 +167,61 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::span(header); - std::memcpy(®s.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2, - (count - 1) * sizeof(u32)); + const auto reg_addr = ContextRegWordOffset + set_data->reg_offset; + const auto* payload = reinterpret_cast(header + 2); + + std::memcpy(®s.reg_array[reg_addr], payload, (count - 1) * sizeof(u32)); + + // In the case of HW, render target memory has alignment as color block operates on + // tiles. There is no information of actual resource extents stored in CB context + // regs, so any deduction of it from slices/pitch will lead to a larger surface created. + // The same applies to the depth targets. Fortunatelly, the guest always sends + // a trailing NOP packet right after the context regs setup, so we can use the heuristic + // below and extract the hint to determine actual resource dims. + + switch (reg_addr) { + case ContextRegs::CbColor0Base: + [[fallthrough]]; + case ContextRegs::CbColor1Base: + [[fallthrough]]; + case ContextRegs::CbColor2Base: + [[fallthrough]]; + case ContextRegs::CbColor3Base: + [[fallthrough]]; + case ContextRegs::CbColor4Base: + [[fallthrough]]; + case ContextRegs::CbColor5Base: + [[fallthrough]]; + case ContextRegs::CbColor6Base: + [[fallthrough]]; + case ContextRegs::CbColor7Base: { + const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) / + (ContextRegs::CbColor1Base - ContextRegs::CbColor0Base); + ASSERT(col_buf_id < NumColorBuffers); + + const auto nop_offset = header->type3.count; + if (nop_offset == 0x0e) { + ASSERT_MSG(payload[nop_offset] == 0xc0001000, + "NOP hint is missing in CB setup sequence"); + last_cb_extent[col_buf_id].raw = payload[nop_offset + 1]; + } else { + last_cb_extent[col_buf_id].raw = 0; + } + break; + } + case ContextRegs::DbZInfo: { + if (header->type3.count == 8) { + ASSERT_MSG(payload[20] == 0xc0001000, + "NOP hint is missing in DB setup sequence"); + last_db_extent.raw = payload[21]; + } else { + last_db_extent.raw = 0; + } + break; + } + default: + break; + } break; } case PM4ItOpcode::SetShReg: { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 1380f1891..c52a0f97a 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -682,6 +682,18 @@ struct Liverpool { Polygon = 21, }; + enum ContextRegs : u32 { + DbZInfo = 0xA010, + CbColor0Base = 0xA318, + CbColor1Base = 0xA327, + CbColor2Base = 0xA336, + CbColor3Base = 0xA345, + CbColor4Base = 0xA354, + CbColor5Base = 0xA363, + CbColor6Base = 0xA372, + CbColor7Base = 0xA381, + }; + union Regs { struct { INSERT_PADDING_WORDS(0x2C08); @@ -765,6 +777,21 @@ struct Liverpool { Regs regs{}; + // See for a comment in context reg parsing code + union CbDbExtent { + struct { + u16 width; + u16 height; + }; + u32 raw{0u}; + + [[nodiscard]] bool Valid() const { + return raw != 0; + } + }; + std::array last_cb_extent{}; + CbDbExtent last_db_extent{}; + public: Liverpool(); ~Liverpool(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d0b873fa2..37d6f72b5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -41,11 +41,14 @@ void Rasterizer::Draw(bool is_indexed) { boost::container::static_vector color_attachments{}; - for (const auto& col_buf : regs.color_buffers) { + for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) { + const auto& col_buf = regs.color_buffers[col_buf_id]; if (!col_buf) { continue; } - const auto& image_view = texture_cache.RenderTarget(col_buf); + + const auto& hint = liverpool->last_cb_extent[col_buf_id]; + const auto& image_view = texture_cache.RenderTarget(col_buf, hint); color_attachments.push_back({ .imageView = *image_view.image_view, diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index efe3cf3dc..cc29f010e 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -82,12 +82,13 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe } } -ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept { +ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept { is_tiled = true; pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat()); type = vk::ImageType::e2D; - size.width = buffer.Pitch(); - size.height = buffer.Height(); + size.width = hint.Valid() ? hint.width : buffer.Pitch(); + size.height = hint.Valid() ? hint.height : buffer.Height(); size.depth = 1; pitch = size.width; guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1); diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 2128d0989..cc3adff4e 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -34,7 +34,8 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) struct ImageInfo { ImageInfo() = default; explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept; - explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept; + explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept; explicit ImageInfo(const AmdGpu::Image& image) noexcept; bool is_tiled = false; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 318f94577..be4bf907b 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -153,8 +153,9 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) { return slot_image_views[view_id]; } -ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) { - const ImageInfo info{buffer}; +ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint) { + const ImageInfo info{buffer, hint}; auto& image = FindImage(info, buffer.Address()); ImageViewInfo view_info; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 21d5ce5dd..94c499299 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -42,7 +42,8 @@ public: ImageView& FindImageView(const AmdGpu::Image& image); /// Retrieves the render target with specified properties - ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer); + ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer, + const AmdGpu::Liverpool::CbDbExtent& hint); /// Reuploads image contents. void RefreshImage(Image& image);