video_core: added a heuristic for determination of CB/DB surface extents

This commit is contained in:
psucien 2024-06-01 19:45:23 +02:00
parent d5568e635d
commit b44a6c1056
7 changed files with 98 additions and 11 deletions

View File

@ -167,8 +167,61 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
case PM4ItOpcode::SetContextReg: {
const auto* set_data = reinterpret_cast<const PM4CmdSetData*>(header);
std::memcpy(&regs.reg_array[ContextRegWordOffset + set_data->reg_offset], header + 2,
(count - 1) * sizeof(u32));
const auto reg_addr = ContextRegWordOffset + set_data->reg_offset;
const auto* payload = reinterpret_cast<const u32*>(header + 2);
std::memcpy(&regs.reg_array[reg_addr], payload, (count - 1) * sizeof(u32));
// In the case of HW, render target memory has alignment as color block operates on
// tiles. There is no information of actual resource extents stored in CB context
// regs, so any deduction of it from slices/pitch will lead to a larger surface created.
// The same applies to the depth targets. Fortunatelly, the guest always sends
// a trailing NOP packet right after the context regs setup, so we can use the heuristic
// below and extract the hint to determine actual resource dims.
switch (reg_addr) {
case ContextRegs::CbColor0Base:
[[fallthrough]];
case ContextRegs::CbColor1Base:
[[fallthrough]];
case ContextRegs::CbColor2Base:
[[fallthrough]];
case ContextRegs::CbColor3Base:
[[fallthrough]];
case ContextRegs::CbColor4Base:
[[fallthrough]];
case ContextRegs::CbColor5Base:
[[fallthrough]];
case ContextRegs::CbColor6Base:
[[fallthrough]];
case ContextRegs::CbColor7Base: {
const auto col_buf_id = (reg_addr - ContextRegs::CbColor0Base) /
(ContextRegs::CbColor1Base - ContextRegs::CbColor0Base);
ASSERT(col_buf_id < NumColorBuffers);
const auto nop_offset = header->type3.count;
if (nop_offset == 0x0e) {
ASSERT_MSG(payload[nop_offset] == 0xc0001000,
"NOP hint is missing in CB setup sequence");
last_cb_extent[col_buf_id].raw = payload[nop_offset + 1];
} else {
last_cb_extent[col_buf_id].raw = 0;
}
break;
}
case ContextRegs::DbZInfo: {
if (header->type3.count == 8) {
ASSERT_MSG(payload[20] == 0xc0001000,
"NOP hint is missing in DB setup sequence");
last_db_extent.raw = payload[21];
} else {
last_db_extent.raw = 0;
}
break;
}
default:
break;
}
break;
}
case PM4ItOpcode::SetShReg: {

View File

@ -682,6 +682,18 @@ struct Liverpool {
Polygon = 21,
};
enum ContextRegs : u32 {
DbZInfo = 0xA010,
CbColor0Base = 0xA318,
CbColor1Base = 0xA327,
CbColor2Base = 0xA336,
CbColor3Base = 0xA345,
CbColor4Base = 0xA354,
CbColor5Base = 0xA363,
CbColor6Base = 0xA372,
CbColor7Base = 0xA381,
};
union Regs {
struct {
INSERT_PADDING_WORDS(0x2C08);
@ -765,6 +777,21 @@ struct Liverpool {
Regs regs{};
// See for a comment in context reg parsing code
union CbDbExtent {
struct {
u16 width;
u16 height;
};
u32 raw{0u};
[[nodiscard]] bool Valid() const {
return raw != 0;
}
};
std::array<CbDbExtent, NumColorBuffers> last_cb_extent{};
CbDbExtent last_db_extent{};
public:
Liverpool();
~Liverpool();

View File

@ -41,11 +41,14 @@ void Rasterizer::Draw(bool is_indexed) {
boost::container::static_vector<vk::RenderingAttachmentInfo, Liverpool::NumColorBuffers>
color_attachments{};
for (const auto& col_buf : regs.color_buffers) {
for (auto col_buf_id = 0u; col_buf_id < Liverpool::NumColorBuffers; ++col_buf_id) {
const auto& col_buf = regs.color_buffers[col_buf_id];
if (!col_buf) {
continue;
}
const auto& image_view = texture_cache.RenderTarget(col_buf);
const auto& hint = liverpool->last_cb_extent[col_buf_id];
const auto& image_view = texture_cache.RenderTarget(col_buf, hint);
color_attachments.push_back({
.imageView = *image_view.image_view,

View File

@ -82,12 +82,13 @@ ImageInfo::ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noe
}
}
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept {
ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint /*= {}*/) noexcept {
is_tiled = true;
pixel_format = LiverpoolToVK::SurfaceFormat(buffer.info.format, buffer.NumFormat());
type = vk::ImageType::e2D;
size.width = buffer.Pitch();
size.height = buffer.Height();
size.width = hint.Valid() ? hint.width : buffer.Pitch();
size.height = hint.Valid() ? hint.height : buffer.Height();
size.depth = 1;
pitch = size.width;
guest_size_bytes = buffer.slice.tile_max * (buffer.view.slice_max + 1);

View File

@ -34,7 +34,8 @@ DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
struct ImageInfo {
ImageInfo() = default;
explicit ImageInfo(const Libraries::VideoOut::BufferAttributeGroup& group) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer) noexcept;
explicit ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
explicit ImageInfo(const AmdGpu::Image& image) noexcept;
bool is_tiled = false;

View File

@ -153,8 +153,9 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc) {
return slot_image_views[view_id];
}
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer) {
const ImageInfo info{buffer};
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
auto& image = FindImage(info, buffer.Address());
ImageViewInfo view_info;

View File

@ -42,7 +42,8 @@ public:
ImageView& FindImageView(const AmdGpu::Image& image);
/// Retrieves the render target with specified properties
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer);
ImageView& RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint);
/// Reuploads image contents.
void RefreshImage(Image& image);