core: Many things (#194)

* video_core: Add a few missed things

* libkernel: More proper memory mapped files

* memory: Fix tessellation buffer mapping

* Cuphead work

* sceKernelPollSema fix

* clang format

* fixed ngs2 lle loading and rtc lib

* draft pthreads keys implementation

* fixed return codes

* return error code if sceKernelLoadStartModule module is invalid

* re-enabled system modules and disable debug in libs.h

* Improve linux support

* fix windows build

* kernel: Rework keys

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
This commit is contained in:
TheTurtle
2024-06-15 14:36:07 +03:00
committed by GitHub
parent 6a47f8ae50
commit c5d1d579b1
67 changed files with 1406 additions and 307 deletions

View File

@@ -22,8 +22,6 @@ Liverpool::Liverpool() {
Liverpool::~Liverpool() {
process_thread.request_stop();
num_submits = -1;
num_submits.notify_one();
process_thread.join();
}
@@ -31,8 +29,10 @@ void Liverpool::Process(std::stop_token stoken) {
Common::SetCurrentThreadName("GPU_CommandProcessor");
while (!stoken.stop_requested()) {
num_submits.wait(0);
{
std::unique_lock lk{submit_mutex};
submit_cv.wait(lk, stoken, [this] { return num_submits != 0; });
}
if (stoken.stop_requested()) {
break;
}
@@ -67,7 +67,8 @@ void Liverpool::Process(std::stop_token stoken) {
}
if (submit_done) {
num_submits.notify_all();
std::scoped_lock lk{submit_mutex};
submit_cv.notify_all();
submit_done = false;
}
}
@@ -76,9 +77,8 @@ void Liverpool::Process(std::stop_token stoken) {
void Liverpool::WaitGpuIdle() {
RENDERER_TRACE;
while (const auto old = num_submits.load()) {
num_submits.wait(old);
}
std::unique_lock lk{submit_mutex};
submit_cv.wait(lk, [this] { return num_submits == 0; });
}
Liverpool::Task Liverpool::ProcessCeUpdate(std::span<const u32> ccb) {
@@ -369,7 +369,6 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
UNREACHABLE_MSG("Unknown PM4 type 3 opcode {:#x} with count {}",
static_cast<u32>(opcode), count);
}
dcb = dcb.subspan(header->type3.NumWords() + 1);
}
@@ -415,8 +414,9 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
queue.submits.emplace(task.handle);
}
std::scoped_lock lk{submit_mutex};
++num_submits;
num_submits.notify_one();
submit_cv.notify_one();
}
void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
@@ -429,8 +429,9 @@ void Liverpool::SubmitAsc(u32 vqid, std::span<const u32> acb) {
queue.submits.emplace(task.handle);
}
std::scoped_lock lk{submit_mutex};
++num_submits;
num_submits.notify_one();
submit_cv.notify_one();
}
} // namespace AmdGpu

View File

@@ -10,6 +10,7 @@
#include "video_core/amdgpu/pixel_format.h"
#include <array>
#include <condition_variable>
#include <coroutine>
#include <mutex>
#include <span>
@@ -479,9 +480,9 @@ struct Liverpool {
};
struct Scissor {
union {
BitField<0, 16, s32> top_left_x;
BitField<16, 16, s32> top_left_y;
struct {
s16 top_left_x;
s16 top_left_y;
};
union {
BitField<0, 15, u32> bottom_right_x;
@@ -865,13 +866,15 @@ public:
void SubmitAsc(u32 vqid, std::span<const u32> acb);
void WaitGpuIdle();
bool IsGpuIdle() const {
return num_submits == 0;
}
void NotifySubmitDone() {
std::scoped_lock lk{submit_mutex};
submit_done = true;
num_submits.notify_all();
submit_cv.notify_all();
}
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
@@ -939,7 +942,9 @@ private:
Vulkan::Rasterizer* rasterizer{};
std::jthread process_thread{};
std::atomic<u32> num_submits{};
u32 num_submits{};
std::mutex submit_mutex;
std::condition_variable_any submit_cv;
std::atomic<bool> submit_done{};
};

View File

@@ -281,7 +281,8 @@ struct Sampler {
};
float LodBias() const noexcept {
return static_cast<float>(lod_bias);
return static_cast<float>(static_cast<int16_t>((lod_bias.Value() ^ 0x2000u) - 0x2000u)) /
256.0f;
}
float MinLod() const noexcept {

View File

@@ -347,6 +347,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu
if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eR8G8Unorm;
}
if (data_format == AmdGpu::DataFormat::FormatBc2 && num_format == AmdGpu::NumberFormat::Unorm) {
return vk::Format::eBc2UnormBlock;
}
UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format));
}
@@ -367,6 +370,10 @@ vk::Format DepthFormat(DepthBuffer::ZFormat z_format, DepthBuffer::StencilFormat
stencil_format == DepthBuffer::StencilFormat::Stencil8) {
return vk::Format::eD16UnormS8Uint;
}
if (z_format == DepthBuffer::ZFormat::Invald &&
stencil_format == DepthBuffer::StencilFormat::Invalid) {
return vk::Format::eUndefined;
}
UNREACHABLE();
}

View File

@@ -9,6 +9,7 @@
#include "sdl_window.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/texture_cache/image.h"
#include <vk_mem_alloc.h>
@@ -199,6 +200,11 @@ Frame* RendererVulkan::PrepareFrame(const Libraries::VideoOut::BufferAttributeGr
return PrepareFrameInternal(image);
}
Frame* RendererVulkan::PrepareBlankFrame() {
auto& image = texture_cache.GetImage(VideoCore::NULL_IMAGE_ID);
return PrepareFrameInternal(image);
}
Frame* RendererVulkan::PrepareFrameInternal(VideoCore::Image& image) {
// Request a free presentation frame.
Frame* frame = GetRenderFrame();

View File

@@ -39,6 +39,7 @@ public:
Frame* PrepareFrame(const Libraries::VideoOut::BufferAttributeGroup& attribute,
VAddr cpu_address);
Frame* PrepareBlankFrame();
bool ShowSplash(Frame* frame = nullptr);
void Present(Frame* frame);

View File

@@ -36,7 +36,8 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
for (const auto& image : info.images) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});

View File

@@ -78,7 +78,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.depthClampEnable = false,
.rasterizerDiscardEnable = false,
.polygonMode = LiverpoolToVK::PolygonMode(key.polygon_mode),
.cullMode = LiverpoolToVK::CullMode(key.cull_mode),
.cullMode = vk::CullModeFlagBits::eNone /*LiverpoolToVK::CullMode(key.cull_mode)*/,
.frontFace = key.front_face == Liverpool::FrontFace::Clockwise
? vk::FrontFace::eClockwise
: vk::FrontFace::eCounterClockwise,
@@ -289,7 +289,8 @@ void GraphicsPipeline::BuildDescSetLayout() {
for (const auto& image : stage.images) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
});
@@ -316,8 +317,8 @@ void GraphicsPipeline::BindResources(Core::MemoryManager* memory, StreamBuffer&
BindVertexBuffers(staging);
// Bind resource buffers and textures.
boost::container::static_vector<vk::DescriptorBufferInfo, 4> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 8> image_infos;
boost::container::static_vector<vk::DescriptorBufferInfo, 16> buffer_infos;
boost::container::static_vector<vk::DescriptorImageInfo, 16> image_infos;
boost::container::small_vector<vk::WriteDescriptorSet, 16> set_writes;
u32 binding{};

View File

@@ -79,6 +79,10 @@ public:
return key.write_masks;
}
[[nodiscard]] bool IsDepthEnabled() const {
return key.depth.depth_enable.Value();
}
private:
void BuildDescSetLayout();
void BindVertexBuffers(StreamBuffer& staging) const;

View File

@@ -6,6 +6,7 @@
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@@ -88,6 +89,8 @@ void PipelineCache::RefreshGraphicsKey() {
auto& key = graphics_key;
key.depth = regs.depth_control;
key.depth.depth_write_enable.Assign(regs.depth_control.depth_write_enable.Value() &&
!regs.depth_render_control.depth_clear_enable);
key.depth_bounds_min = regs.depth_bounds_min;
key.depth_bounds_max = regs.depth_bounds_max;
key.depth_bias_enable = regs.polygon_control.enable_polygon_offset_back ||
@@ -111,9 +114,10 @@ void PipelineCache::RefreshGraphicsKey() {
key.front_face = regs.polygon_control.front_face;
const auto& db = regs.depth_buffer;
key.depth_format = key.depth.depth_enable
? LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format)
: vk::Format::eUndefined;
if (key.depth.depth_enable) {
key.depth_format = LiverpoolToVK::DepthFormat(db.z_info.format, db.stencil_info.format);
key.depth.depth_enable.Assign(key.depth_format != vk::Format::eUndefined);
}
// `RenderingInfo` is assumed to be initialized with a contiguous array of valid color
// attachments. This might be not a case as HW color buffers can be bound in an arbitrary order.
// We need to do some arrays compaction at this stage
@@ -180,6 +184,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
inst_pool.ReleaseContents();
// Recompile shader to IR.
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#X}", stage, hash);
const Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));

View File

@@ -62,7 +62,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
.storeOp = vk::AttachmentStoreOp::eStore,
});
}
if (regs.depth_control.depth_enable && regs.depth_buffer.Address() != 0) {
if (pipeline->IsDepthEnabled() && regs.depth_buffer.Address() != 0) {
const bool is_clear = regs.depth_render_control.depth_clear_enable;
const auto& image_view =
texture_cache.DepthTarget(regs.depth_buffer, liverpool->last_db_extent);

View File

@@ -88,6 +88,8 @@ private:
vk::Image image{};
};
constexpr SlotId NULL_IMAGE_ID{0};
struct Image {
explicit Image(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
const ImageInfo& info, VAddr cpu_addr);

View File

@@ -46,17 +46,20 @@ vk::ComponentSwizzle ConvertComponentSwizzle(u32 dst_sel) {
}
}
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image) noexcept {
ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept
: is_storage{is_storage} {
type = ConvertImageViewType(image.type);
format = Vulkan::LiverpoolToVK::SurfaceFormat(image.GetDataFmt(), image.GetNumberFmt());
range.base.level = 0;
range.base.layer = 0;
range.extent.levels = image.NumLevels();
range.extent.layers = image.NumLayers();
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
if (!is_storage) {
mapping.r = ConvertComponentSwizzle(image.dst_sel_x);
mapping.g = ConvertComponentSwizzle(image.dst_sel_y);
mapping.b = ConvertComponentSwizzle(image.dst_sel_z);
mapping.a = ConvertComponentSwizzle(image.dst_sel_w);
}
}
ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info_, Image& image,
@@ -74,7 +77,7 @@ ImageView::ImageView(const Vulkan::Instance& instance, const ImageViewInfo& info
}
const vk::ImageViewCreateInfo image_view_ci = {
.pNext = usage_override.has_value() ? &usage_ci : nullptr,
.pNext = nullptr,
.image = image.image,
.viewType = info.type,
.format = format,

View File

@@ -18,12 +18,13 @@ namespace VideoCore {
struct ImageViewInfo {
explicit ImageViewInfo() = default;
explicit ImageViewInfo(const AmdGpu::Image& image) noexcept;
explicit ImageViewInfo(const AmdGpu::Image& image, bool is_storage) noexcept;
vk::ImageViewType type = vk::ImageViewType::e2D;
vk::Format format = vk::Format::eR8G8B8A8Unorm;
SubresourceRange range;
vk::ComponentMapping mapping{};
bool is_storage;
auto operator<=>(const ImageViewInfo&) const = default;
};

View File

@@ -169,14 +169,14 @@ ImageView& TextureCache::FindImageView(const AmdGpu::Image& desc, bool is_storag
image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits::eShaderRead);
}
const ImageViewInfo view_info{desc};
const ImageViewInfo view_info{desc, is_storage};
return RegisterImageView(image, view_info);
}
ImageView& TextureCache::RenderTarget(const AmdGpu::Liverpool::ColorBuffer& buffer,
const AmdGpu::Liverpool::CbDbExtent& hint) {
const ImageInfo info{buffer, hint};
auto& image = FindImage(info, buffer.Address());
auto& image = FindImage(info, buffer.Address(), false);
image.flags &= ~ImageFlagBits::CpuModified;
image.Transit(vk::ImageLayout::eColorAttachmentOptimal,

View File

@@ -55,6 +55,11 @@ public:
/// Retrieves the sampler that matches the provided S# descriptor.
[[nodiscard]] vk::Sampler GetSampler(const AmdGpu::Sampler& sampler);
/// Retrieves the image with the specified id.
[[nodiscard]] Image& GetImage(ImageId id) {
return slot_images[id];
}
private:
ImageView& RegisterImageView(Image& image, const ImageViewInfo& view_info);