From 9649d9f0df7ee7ddcf11fdc5d962d562888c9112 Mon Sep 17 00:00:00 2001 From: psucien Date: Thu, 18 Jul 2024 00:01:57 +0200 Subject: [PATCH] amdgpu: proper CB and DB sizes calculation; minor refactoring --- src/core/libraries/videoout/buffer.h | 1 - src/core/libraries/videoout/driver.cpp | 2 - src/video_core/amdgpu/liverpool.h | 45 +++++++++++++++---- src/video_core/amdgpu/resource.h | 14 +++++- .../renderer_vulkan/liverpool_to_vk.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/image.cpp | 6 +-- 7 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/core/libraries/videoout/buffer.h b/src/core/libraries/videoout/buffer.h index 88dad8522..8f49be591 100644 --- a/src/core/libraries/videoout/buffer.h +++ b/src/core/libraries/videoout/buffer.h @@ -62,7 +62,6 @@ struct BufferAttribute { struct BufferAttributeGroup { bool is_occupied; BufferAttribute attrib; - u32 size_in_bytes; }; struct VideoOutBuffer { diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index ece4ea010..e74fb10f2 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -122,8 +122,6 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co auto& group = port->groups[group_index]; std::memcpy(&group.attrib, attribute, sizeof(BufferAttribute)); - group.size_in_bytes = - attribute->height * attribute->pitch_in_pixel * PixelFormatBpp(attribute->pixel_format); group.is_occupied = true; for (u32 i = 0; i < bufferNum; i++) { diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 84539c28c..bffec92bc 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -321,7 +321,7 @@ struct Liverpool { struct DepthBuffer { enum class ZFormat : u32 { - Invald = 0, + Invalid = 0, Z16 = 1, Z32Float = 3, }; @@ -367,8 +367,14 @@ struct Liverpool { return u64(z_read_base) << 8; } - size_t GetSizeAligned() const { - return depth_slice.tile_max * 8; + u32 NumSamples() const { + return 1u << z_info.num_samples; // spec doesn't say it is a log2 + } + + size_t GetDepthSliceSize() const { + ASSERT(z_info.format != ZFormat::Invalid); + const auto bpe = z_info.format == ZFormat::Z32Float ? 4 : 2; + return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples(); } }; @@ -733,12 +739,19 @@ struct Liverpool { return VAddr(fmask_base_address) << 8; } - size_t GetSizeAligned() const { + u32 NumSamples() const { + return 1 << attrib.num_fragments_log2; + } + + u32 NumSlices() const { + return view.slice_max + 1; + } + + size_t GetColorSliceSize() const { const auto num_bytes_per_element = NumBits(info.format) / 8u; - const auto slice_size = (slice.tile_max + 1) * 64u; - const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element; - ASSERT(total_size > 0); - return total_size; + const auto slice_size = + num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples(); + return slice_size; } TilingMode GetTilingMode() const { @@ -819,6 +832,17 @@ struct Liverpool { BitField<6, 1, u32> depth_compress_disable; }; + union DepthView { + BitField<0, 11, u32> slice_start; + BitField<13, 11, u32> slice_max; + BitField<24, 1, u32> z_read_only; + BitField<25, 1, u32> stencil_read_only; + + u32 NumSlices() const { + return slice_max + 1u; + } + }; + union AaConfig { BitField<0, 3, u32> msaa_num_samples; BitField<4, 1, u32> aa_mask_centroid_dtmn; @@ -849,7 +873,9 @@ struct Liverpool { ComputeProgram cs_program; INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5); DepthRenderControl depth_render_control; - INSERT_PADDING_WORDS(4); + INSERT_PADDING_WORDS(1); + DepthView depth_view; + INSERT_PADDING_WORDS(2); Address depth_htile_data_base; INSERT_PADDING_WORDS(2); float depth_bounds_min; @@ -1050,6 +1076,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03); static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C); static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40); static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000); +static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002); static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005); static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C); static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010); diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index 1247c0256..d85663df6 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -36,6 +36,12 @@ struct Buffer { u32 element_size : 2; u32 index_stride : 2; u32 add_tid_enable : 1; + u32 : 6; + u32 type : 2; // overlaps with T# type, so should be 0 for buffer + + bool Valid() const { + return type == 0u; + } operator bool() const noexcept { return base_address != 0; @@ -149,7 +155,7 @@ struct Image { u64 pow2pad : 1; u64 mtype2 : 1; u64 atc : 1; - u64 type : 4; + u64 type : 4; // overlaps with V# type, so shouldn't be 0 for buffer u64 depth : 13; u64 pitch : 14; @@ -162,6 +168,10 @@ struct Image { u64 lod_hw_cnt_en : 1; u64 : 43; + bool Valid() const { + return (type & 0x8u) != 0; + } + VAddr Address() const { return base_address << 8; } @@ -208,7 +218,7 @@ struct Image { return GetTilingMode() != TilingMode::Display_Linear; } - size_t GetSizeAligned() const { + size_t GetSize() const { // TODO: Derive this properly from tiling params return Pitch() * (height + 1) * NumComponents(GetDataFmt()); } diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index f361fcad6..795e3894c 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -501,7 +501,7 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat stencil_format == DepthBuffer::StencilFormat::Stencil8) { return vk::Format::eD16UnormS8Uint; } - if (z_format == DepthBuffer::ZFormat::Invald && + if (z_format == DepthBuffer::ZFormat::Invalid && stencil_format == DepthBuffer::StencilFormat::Invalid) { return vk::Format::eUndefined; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fff9bc331..f98e890f2 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -130,7 +130,7 @@ void Rasterizer::BeginRendering() { texture_cache.TouchMeta(col_buf.CmaskAddress(), false); } - if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invald && + if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invalid && regs.depth_buffer.Address() != 0) { const auto htile_address = regs.depth_htile_data_base.GetAddress(); const bool is_clear = regs.depth_render_control.depth_clear_enable || diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index f06492ef8..5192bd65e 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -152,7 +152,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer, size.height = hint.Valid() ? hint.height : buffer.Height(); size.depth = 1; pitch = size.width; - guest_size_bytes = buffer.GetSizeAligned(); + guest_size_bytes = buffer.GetColorSliceSize(); meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0; meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0; usage.render_target = true; @@ -168,7 +168,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_a size.height = hint.Valid() ? hint.height : buffer.Height(); size.depth = 1; pitch = size.width; - guest_size_bytes = buffer.GetSizeAligned(); + guest_size_bytes = buffer.GetDepthSliceSize(); meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0; usage.depth_target = true; } @@ -184,7 +184,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept { pitch = image.Pitch(); resources.levels = image.NumLevels(); resources.layers = image.NumLayers(); - guest_size_bytes = image.GetSizeAligned(); + guest_size_bytes = image.GetSize(); usage.texture = true; }