mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-07-27 04:25:12 +00:00
amdgpu: proper CB and DB sizes calculation; minor refactoring
This commit is contained in:
parent
384ea359ed
commit
9649d9f0df
@ -62,7 +62,6 @@ struct BufferAttribute {
|
|||||||
struct BufferAttributeGroup {
|
struct BufferAttributeGroup {
|
||||||
bool is_occupied;
|
bool is_occupied;
|
||||||
BufferAttribute attrib;
|
BufferAttribute attrib;
|
||||||
u32 size_in_bytes;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VideoOutBuffer {
|
struct VideoOutBuffer {
|
||||||
|
@ -122,8 +122,6 @@ int VideoOutDriver::RegisterBuffers(VideoOutPort* port, s32 startIndex, void* co
|
|||||||
|
|
||||||
auto& group = port->groups[group_index];
|
auto& group = port->groups[group_index];
|
||||||
std::memcpy(&group.attrib, attribute, sizeof(BufferAttribute));
|
std::memcpy(&group.attrib, attribute, sizeof(BufferAttribute));
|
||||||
group.size_in_bytes =
|
|
||||||
attribute->height * attribute->pitch_in_pixel * PixelFormatBpp(attribute->pixel_format);
|
|
||||||
group.is_occupied = true;
|
group.is_occupied = true;
|
||||||
|
|
||||||
for (u32 i = 0; i < bufferNum; i++) {
|
for (u32 i = 0; i < bufferNum; i++) {
|
||||||
|
@ -321,7 +321,7 @@ struct Liverpool {
|
|||||||
|
|
||||||
struct DepthBuffer {
|
struct DepthBuffer {
|
||||||
enum class ZFormat : u32 {
|
enum class ZFormat : u32 {
|
||||||
Invald = 0,
|
Invalid = 0,
|
||||||
Z16 = 1,
|
Z16 = 1,
|
||||||
Z32Float = 3,
|
Z32Float = 3,
|
||||||
};
|
};
|
||||||
@ -367,8 +367,14 @@ struct Liverpool {
|
|||||||
return u64(z_read_base) << 8;
|
return u64(z_read_base) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t GetSizeAligned() const {
|
u32 NumSamples() const {
|
||||||
return depth_slice.tile_max * 8;
|
return 1u << z_info.num_samples; // spec doesn't say it is a log2
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetDepthSliceSize() const {
|
||||||
|
ASSERT(z_info.format != ZFormat::Invalid);
|
||||||
|
const auto bpe = z_info.format == ZFormat::Z32Float ? 4 : 2;
|
||||||
|
return (depth_slice.tile_max + 1) * 64 * bpe * NumSamples();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -733,12 +739,19 @@ struct Liverpool {
|
|||||||
return VAddr(fmask_base_address) << 8;
|
return VAddr(fmask_base_address) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t GetSizeAligned() const {
|
u32 NumSamples() const {
|
||||||
|
return 1 << attrib.num_fragments_log2;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 NumSlices() const {
|
||||||
|
return view.slice_max + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t GetColorSliceSize() const {
|
||||||
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
const auto num_bytes_per_element = NumBits(info.format) / 8u;
|
||||||
const auto slice_size = (slice.tile_max + 1) * 64u;
|
const auto slice_size =
|
||||||
const auto total_size = slice_size * (view.slice_max + 1) * num_bytes_per_element;
|
num_bytes_per_element * (slice.tile_max + 1) * 64u * NumSamples();
|
||||||
ASSERT(total_size > 0);
|
return slice_size;
|
||||||
return total_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TilingMode GetTilingMode() const {
|
TilingMode GetTilingMode() const {
|
||||||
@ -819,6 +832,17 @@ struct Liverpool {
|
|||||||
BitField<6, 1, u32> depth_compress_disable;
|
BitField<6, 1, u32> depth_compress_disable;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
union DepthView {
|
||||||
|
BitField<0, 11, u32> slice_start;
|
||||||
|
BitField<13, 11, u32> slice_max;
|
||||||
|
BitField<24, 1, u32> z_read_only;
|
||||||
|
BitField<25, 1, u32> stencil_read_only;
|
||||||
|
|
||||||
|
u32 NumSlices() const {
|
||||||
|
return slice_max + 1u;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
union AaConfig {
|
union AaConfig {
|
||||||
BitField<0, 3, u32> msaa_num_samples;
|
BitField<0, 3, u32> msaa_num_samples;
|
||||||
BitField<4, 1, u32> aa_mask_centroid_dtmn;
|
BitField<4, 1, u32> aa_mask_centroid_dtmn;
|
||||||
@ -849,7 +873,9 @@ struct Liverpool {
|
|||||||
ComputeProgram cs_program;
|
ComputeProgram cs_program;
|
||||||
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
INSERT_PADDING_WORDS(0xA008 - 0x2E00 - 80 - 3 - 5);
|
||||||
DepthRenderControl depth_render_control;
|
DepthRenderControl depth_render_control;
|
||||||
INSERT_PADDING_WORDS(4);
|
INSERT_PADDING_WORDS(1);
|
||||||
|
DepthView depth_view;
|
||||||
|
INSERT_PADDING_WORDS(2);
|
||||||
Address depth_htile_data_base;
|
Address depth_htile_data_base;
|
||||||
INSERT_PADDING_WORDS(2);
|
INSERT_PADDING_WORDS(2);
|
||||||
float depth_bounds_min;
|
float depth_bounds_min;
|
||||||
@ -1050,6 +1076,7 @@ static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
|
|||||||
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
|
static_assert(GFX6_3D_REG_INDEX(cs_program.address_lo) == 0x2E0C);
|
||||||
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
static_assert(GFX6_3D_REG_INDEX(cs_program.user_data) == 0x2E40);
|
||||||
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
static_assert(GFX6_3D_REG_INDEX(depth_render_control) == 0xA000);
|
||||||
|
static_assert(GFX6_3D_REG_INDEX(depth_view) == 0xA002);
|
||||||
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
static_assert(GFX6_3D_REG_INDEX(depth_htile_data_base) == 0xA005);
|
||||||
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
static_assert(GFX6_3D_REG_INDEX(screen_scissor) == 0xA00C);
|
||||||
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
|
static_assert(GFX6_3D_REG_INDEX(depth_buffer.z_info) == 0xA010);
|
||||||
|
@ -36,6 +36,12 @@ struct Buffer {
|
|||||||
u32 element_size : 2;
|
u32 element_size : 2;
|
||||||
u32 index_stride : 2;
|
u32 index_stride : 2;
|
||||||
u32 add_tid_enable : 1;
|
u32 add_tid_enable : 1;
|
||||||
|
u32 : 6;
|
||||||
|
u32 type : 2; // overlaps with T# type, so should be 0 for buffer
|
||||||
|
|
||||||
|
bool Valid() const {
|
||||||
|
return type == 0u;
|
||||||
|
}
|
||||||
|
|
||||||
operator bool() const noexcept {
|
operator bool() const noexcept {
|
||||||
return base_address != 0;
|
return base_address != 0;
|
||||||
@ -149,7 +155,7 @@ struct Image {
|
|||||||
u64 pow2pad : 1;
|
u64 pow2pad : 1;
|
||||||
u64 mtype2 : 1;
|
u64 mtype2 : 1;
|
||||||
u64 atc : 1;
|
u64 atc : 1;
|
||||||
u64 type : 4;
|
u64 type : 4; // overlaps with V# type, so shouldn't be 0 for buffer
|
||||||
|
|
||||||
u64 depth : 13;
|
u64 depth : 13;
|
||||||
u64 pitch : 14;
|
u64 pitch : 14;
|
||||||
@ -162,6 +168,10 @@ struct Image {
|
|||||||
u64 lod_hw_cnt_en : 1;
|
u64 lod_hw_cnt_en : 1;
|
||||||
u64 : 43;
|
u64 : 43;
|
||||||
|
|
||||||
|
bool Valid() const {
|
||||||
|
return (type & 0x8u) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
VAddr Address() const {
|
VAddr Address() const {
|
||||||
return base_address << 8;
|
return base_address << 8;
|
||||||
}
|
}
|
||||||
@ -208,7 +218,7 @@ struct Image {
|
|||||||
return GetTilingMode() != TilingMode::Display_Linear;
|
return GetTilingMode() != TilingMode::Display_Linear;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t GetSizeAligned() const {
|
size_t GetSize() const {
|
||||||
// TODO: Derive this properly from tiling params
|
// TODO: Derive this properly from tiling params
|
||||||
return Pitch() * (height + 1) * NumComponents(GetDataFmt());
|
return Pitch() * (height + 1) * NumComponents(GetDataFmt());
|
||||||
}
|
}
|
||||||
|
@ -501,7 +501,7 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
|
|||||||
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
|
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
|
||||||
return vk::Format::eD16UnormS8Uint;
|
return vk::Format::eD16UnormS8Uint;
|
||||||
}
|
}
|
||||||
if (z_format == DepthBuffer::ZFormat::Invald &&
|
if (z_format == DepthBuffer::ZFormat::Invalid &&
|
||||||
stencil_format == DepthBuffer::StencilFormat::Invalid) {
|
stencil_format == DepthBuffer::StencilFormat::Invalid) {
|
||||||
return vk::Format::eUndefined;
|
return vk::Format::eUndefined;
|
||||||
}
|
}
|
||||||
|
@ -130,7 +130,7 @@ void Rasterizer::BeginRendering() {
|
|||||||
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
|
texture_cache.TouchMeta(col_buf.CmaskAddress(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invald &&
|
if (regs.depth_buffer.z_info.format != Liverpool::DepthBuffer::ZFormat::Invalid &&
|
||||||
regs.depth_buffer.Address() != 0) {
|
regs.depth_buffer.Address() != 0) {
|
||||||
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
const auto htile_address = regs.depth_htile_data_base.GetAddress();
|
||||||
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
const bool is_clear = regs.depth_render_control.depth_clear_enable ||
|
||||||
|
@ -152,7 +152,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
|
|||||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||||
size.depth = 1;
|
size.depth = 1;
|
||||||
pitch = size.width;
|
pitch = size.width;
|
||||||
guest_size_bytes = buffer.GetSizeAligned();
|
guest_size_bytes = buffer.GetColorSliceSize();
|
||||||
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
|
meta_info.cmask_addr = buffer.info.fast_clear ? buffer.CmaskAddress() : 0;
|
||||||
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
|
meta_info.fmask_addr = buffer.info.compression ? buffer.FmaskAddress() : 0;
|
||||||
usage.render_target = true;
|
usage.render_target = true;
|
||||||
@ -168,7 +168,7 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, VAddr htile_a
|
|||||||
size.height = hint.Valid() ? hint.height : buffer.Height();
|
size.height = hint.Valid() ? hint.height : buffer.Height();
|
||||||
size.depth = 1;
|
size.depth = 1;
|
||||||
pitch = size.width;
|
pitch = size.width;
|
||||||
guest_size_bytes = buffer.GetSizeAligned();
|
guest_size_bytes = buffer.GetDepthSliceSize();
|
||||||
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
|
||||||
usage.depth_target = true;
|
usage.depth_target = true;
|
||||||
}
|
}
|
||||||
@ -184,7 +184,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
|||||||
pitch = image.Pitch();
|
pitch = image.Pitch();
|
||||||
resources.levels = image.NumLevels();
|
resources.levels = image.NumLevels();
|
||||||
resources.layers = image.NumLayers();
|
resources.layers = image.NumLayers();
|
||||||
guest_size_bytes = image.GetSizeAligned();
|
guest_size_bytes = image.GetSize();
|
||||||
usage.texture = true;
|
usage.texture = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user